Annotation of qemu/roms/ipxe/src/drivers/net/ipoib.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (C) 2007 Michael Brown <[email protected]>.
                      3:  *
                      4:  * This program is free software; you can redistribute it and/or
                      5:  * modify it under the terms of the GNU General Public License as
                      6:  * published by the Free Software Foundation; either version 2 of the
                      7:  * License, or any later version.
                      8:  *
                      9:  * This program is distributed in the hope that it will be useful, but
                     10:  * WITHOUT ANY WARRANTY; without even the implied warranty of
                     11:  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
                     12:  * General Public License for more details.
                     13:  *
                     14:  * You should have received a copy of the GNU General Public License
                     15:  * along with this program; if not, write to the Free Software
                     16:  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
                     17:  */
                     18: 
                     19: FILE_LICENCE ( GPL2_OR_LATER );
                     20: 
                     21: #include <stdint.h>
                     22: #include <stdio.h>
                     23: #include <unistd.h>
                     24: #include <string.h>
                     25: #include <byteswap.h>
                     26: #include <errno.h>
                     27: #include <ipxe/errortab.h>
                     28: #include <ipxe/if_arp.h>
                     29: #include <ipxe/iobuf.h>
                     30: #include <ipxe/netdevice.h>
                     31: #include <ipxe/infiniband.h>
                     32: #include <ipxe/ib_pathrec.h>
                     33: #include <ipxe/ib_mcast.h>
                     34: #include <ipxe/ipoib.h>
                     35: 
                     36: /** @file
                     37:  *
                     38:  * IP over Infiniband
                     39:  */
                     40: 
                     41: /** Number of IPoIB send work queue entries */
                     42: #define IPOIB_NUM_SEND_WQES 2
                     43: 
                     44: /** Number of IPoIB receive work queue entries */
                     45: #define IPOIB_NUM_RECV_WQES 4
                     46: 
                     47: /** Number of IPoIB completion entries */
                     48: #define IPOIB_NUM_CQES 8
                     49: 
                     50: /** An IPoIB device */
                     51: struct ipoib_device {
                     52:        /** Network device */
                     53:        struct net_device *netdev;
                     54:        /** Underlying Infiniband device */
                     55:        struct ib_device *ibdev;
                     56:        /** Completion queue */
                     57:        struct ib_completion_queue *cq;
                     58:        /** Queue pair */
                     59:        struct ib_queue_pair *qp;
                     60:        /** Broadcast MAC */
                     61:        struct ipoib_mac broadcast;
                     62:        /** Joined to IPv4 broadcast multicast group
                     63:         *
                     64:         * This flag indicates whether or not we have initiated the
                     65:         * join to the IPv4 broadcast multicast group.
                     66:         */
                     67:        int broadcast_joined;
                     68:        /** IPv4 broadcast multicast group membership */
                     69:        struct ib_mc_membership broadcast_membership;
                     70: };
                     71: 
                     72: /** Broadcast IPoIB address */
                     73: static struct ipoib_mac ipoib_broadcast = {
                     74:        .flags__qpn = htonl ( IB_QPN_BROADCAST ),
                     75:        .gid.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
                     76:                       0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
                     77: };
                     78: 
                     79: /** Link status for "broadcast join in progress" */
                     80: #define EINPROGRESS_JOINING __einfo_error ( EINFO_EINPROGRESS_JOINING )
                     81: #define EINFO_EINPROGRESS_JOINING __einfo_uniqify \
                     82:        ( EINFO_EINPROGRESS, 0x01, "Joining" )
                     83: 
                     84: /** Human-readable message for the link status */
                     85: struct errortab ipoib_errors[] __errortab = {
                     86:        __einfo_errortab ( EINFO_EINPROGRESS_JOINING ),
                     87: };
                     88: 
                     89: /****************************************************************************
                     90:  *
                     91:  * IPoIB peer cache
                     92:  *
                     93:  ****************************************************************************
                     94:  */
                     95: 
                     96: /**
                     97:  * IPoIB peer address
                     98:  *
                     99:  * The IPoIB link-layer header is only four bytes long and so does not
                    100:  * have sufficient room to store IPoIB MAC address(es).  We therefore
                    101:  * maintain a cache of MAC addresses identified by a single-byte key,
                    102:  * and abuse the spare two bytes within the link-layer header to
                    103:  * communicate these MAC addresses between the link-layer code and the
                    104:  * netdevice driver.
                    105:  */
                    106: struct ipoib_peer {
                    107:        /** Key */
                    108:        uint8_t key;
                    109:        /** MAC address */
                    110:        struct ipoib_mac mac;
                    111: };
                    112: 
                    113: /** Number of IPoIB peer cache entries
                    114:  *
                    115:  * Must be a power of two.
                    116:  */
                    117: #define IPOIB_NUM_CACHED_PEERS 4
                    118: 
                    119: /** IPoIB peer address cache */
                    120: static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
                    121: 
                    122: /** Oldest IPoIB peer cache entry index */
                    123: static unsigned int ipoib_peer_cache_idx = 1;
                    124: 
                    125: /**
                    126:  * Look up cached peer by key
                    127:  *
                    128:  * @v key              Peer cache key
                    129:  * @ret peer           Peer cache entry, or NULL
                    130:  */
                    131: static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
                    132:        struct ipoib_peer *peer;
                    133:        unsigned int i;
                    134: 
                    135:        for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
                    136:                peer = &ipoib_peer_cache[i];
                    137:                if ( peer->key == key )
                    138:                        return peer;
                    139:        }
                    140: 
                    141:        if ( key != 0 ) {
                    142:                DBG ( "IPoIB warning: peer cache lost track of key %x while "
                    143:                      "still in use\n", key );
                    144:        }
                    145:        return NULL;
                    146: }
                    147: 
                    148: /**
                    149:  * Store GID and QPN in peer cache
                    150:  *
                    151:  * @v mac              Peer MAC address
                    152:  * @ret peer           Peer cache entry
                    153:  */
                    154: static struct ipoib_peer * ipoib_cache_peer ( const struct ipoib_mac *mac ) {
                    155:        struct ipoib_peer *peer;
                    156:        unsigned int key;
                    157:        unsigned int i;
                    158: 
                    159:        /* Look for existing cache entry */
                    160:        for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
                    161:                peer = &ipoib_peer_cache[i];
                    162:                if ( memcmp ( &peer->mac, mac, sizeof ( peer->mac ) ) == 0 )
                    163:                        return peer;
                    164:        }
                    165: 
                    166:        /* No entry found: create a new one */
                    167:        key = ipoib_peer_cache_idx++;
                    168:        peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
                    169:        if ( peer->key )
                    170:                DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
                    171: 
                    172:        memset ( peer, 0, sizeof ( *peer ) );
                    173:        peer->key = key;
                    174:        memcpy ( &peer->mac, mac, sizeof ( peer->mac ) );
                    175:        DBG ( "IPoIB peer %x has MAC %s\n",
                    176:              peer->key, ipoib_ntoa ( &peer->mac ) );
                    177:        return peer;
                    178: }
                    179: 
                    180: /****************************************************************************
                    181:  *
                    182:  * IPoIB link layer
                    183:  *
                    184:  ****************************************************************************
                    185:  */
                    186: 
                    187: /**
                    188:  * Add IPoIB link-layer header
                    189:  *
                    190:  * @v netdev           Network device
                    191:  * @v iobuf            I/O buffer
                    192:  * @v ll_dest          Link-layer destination address
                    193:  * @v ll_source                Source link-layer address
                    194:  * @v net_proto                Network-layer protocol, in network-byte order
                    195:  * @ret rc             Return status code
                    196:  */
                    197: static int ipoib_push ( struct net_device *netdev __unused,
                    198:                        struct io_buffer *iobuf, const void *ll_dest,
                    199:                        const void *ll_source __unused, uint16_t net_proto ) {
                    200:        struct ipoib_hdr *ipoib_hdr =
                    201:                iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
                    202:        const struct ipoib_mac *dest_mac = ll_dest;
                    203:        const struct ipoib_mac *src_mac = ll_source;
                    204:        struct ipoib_peer *dest;
                    205:        struct ipoib_peer *src;
                    206: 
                    207:        /* Add link-layer addresses to cache */
                    208:        dest = ipoib_cache_peer ( dest_mac );
                    209:        src = ipoib_cache_peer ( src_mac );
                    210: 
                    211:        /* Build IPoIB header */
                    212:        ipoib_hdr->proto = net_proto;
                    213:        ipoib_hdr->u.peer.dest = dest->key;
                    214:        ipoib_hdr->u.peer.src = src->key;
                    215: 
                    216:        return 0;
                    217: }
                    218: 
                    219: /**
                    220:  * Remove IPoIB link-layer header
                    221:  *
                    222:  * @v netdev           Network device
                    223:  * @v iobuf            I/O buffer
                    224:  * @ret ll_dest                Link-layer destination address
                    225:  * @ret ll_source      Source link-layer address
                    226:  * @ret net_proto      Network-layer protocol, in network-byte order
                    227:  * @ret rc             Return status code
                    228:  */
                    229: static int ipoib_pull ( struct net_device *netdev,
                    230:                        struct io_buffer *iobuf, const void **ll_dest,
                    231:                        const void **ll_source, uint16_t *net_proto ) {
                    232:        struct ipoib_device *ipoib = netdev->priv;
                    233:        struct ipoib_hdr *ipoib_hdr = iobuf->data;
                    234:        struct ipoib_peer *dest;
                    235:        struct ipoib_peer *source;
                    236: 
                    237:        /* Sanity check */
                    238:        if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
                    239:                DBG ( "IPoIB packet too short for link-layer header\n" );
                    240:                DBG_HD ( iobuf->data, iob_len ( iobuf ) );
                    241:                return -EINVAL;
                    242:        }
                    243: 
                    244:        /* Strip off IPoIB header */
                    245:        iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
                    246: 
                    247:        /* Identify source and destination addresses, and clear
                    248:         * reserved word in IPoIB header
                    249:         */
                    250:        dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
                    251:        source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
                    252:        ipoib_hdr->u.reserved = 0;
                    253: 
                    254:        /* Fill in required fields */
                    255:        *ll_dest = ( dest ? &dest->mac : &ipoib->broadcast );
                    256:        *ll_source = ( source ? &source->mac : &ipoib->broadcast );
                    257:        *net_proto = ipoib_hdr->proto;
                    258: 
                    259:        return 0;
                    260: }
                    261: 
                    262: /**
                    263:  * Initialise IPoIB link-layer address
                    264:  *
                    265:  * @v hw_addr          Hardware address
                    266:  * @v ll_addr          Link-layer address
                    267:  */
                    268: static void ipoib_init_addr ( const void *hw_addr, void *ll_addr ) {
                    269:        const union ib_guid *guid = hw_addr;
                    270:        struct ipoib_mac *mac = ll_addr;
                    271: 
                    272:        memset ( mac, 0, sizeof ( *mac ) );
                    273:        memcpy ( &mac->gid.s.guid, guid, sizeof ( mac->gid.s.guid ) );
                    274: }
                    275: 
                    276: /**
                    277:  * Transcribe IPoIB link-layer address
                    278:  *
                    279:  * @v ll_addr  Link-layer address
                    280:  * @ret string Link-layer address in human-readable format
                    281:  */
                    282: const char * ipoib_ntoa ( const void *ll_addr ) {
                    283:        static char buf[45];
                    284:        const struct ipoib_mac *mac = ll_addr;
                    285: 
                    286:        snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
                    287:                   htonl ( mac->flags__qpn ), htonl ( mac->gid.dwords[0] ),
                    288:                   htonl ( mac->gid.dwords[1] ),
                    289:                   htonl ( mac->gid.dwords[2] ),
                    290:                   htonl ( mac->gid.dwords[3] ) );
                    291:        return buf;
                    292: }
                    293: 
                    294: /**
                    295:  * Hash multicast address
                    296:  *
                    297:  * @v af               Address family
                    298:  * @v net_addr         Network-layer address
                    299:  * @v ll_addr          Link-layer address to fill in
                    300:  * @ret rc             Return status code
                    301:  */
                    302: static int ipoib_mc_hash ( unsigned int af __unused,
                    303:                           const void *net_addr __unused,
                    304:                           void *ll_addr __unused ) {
                    305: 
                    306:        return -ENOTSUP;
                    307: }
                    308: 
                    309: /**
                    310:  * Generate Mellanox Ethernet-compatible compressed link-layer address
                    311:  *
                    312:  * @v ll_addr          Link-layer address
                    313:  * @v eth_addr         Ethernet-compatible address to fill in
                    314:  */
                    315: static int ipoib_mlx_eth_addr ( const union ib_guid *guid,
                    316:                                uint8_t *eth_addr ) {
                    317:        eth_addr[0] = ( ( guid->bytes[3] == 2 ) ? 0x00 : 0x02 );
                    318:        eth_addr[1] = guid->bytes[1];
                    319:        eth_addr[2] = guid->bytes[2];
                    320:        eth_addr[3] = guid->bytes[5];
                    321:        eth_addr[4] = guid->bytes[6];
                    322:        eth_addr[5] = guid->bytes[7];
                    323:        return 0;
                    324: }
                    325: 
                    326: /** An IPoIB Ethernet-compatible compressed link-layer address generator */
                    327: struct ipoib_eth_addr_handler {
                    328:        /** GUID byte 1 */
                    329:        uint8_t byte1;
                    330:        /** GUID byte 2 */
                    331:        uint8_t byte2;
                    332:        /** Handler */
                    333:        int ( * eth_addr ) ( const union ib_guid *guid,
                    334:                             uint8_t *eth_addr );
                    335: };
                    336: 
                    337: /** IPoIB Ethernet-compatible compressed link-layer address generators */
                    338: static struct ipoib_eth_addr_handler ipoib_eth_addr_handlers[] = {
                    339:        { 0x02, 0xc9, ipoib_mlx_eth_addr },
                    340: };
                    341: 
                    342: /**
                    343:  * Generate Ethernet-compatible compressed link-layer address
                    344:  *
                    345:  * @v ll_addr          Link-layer address
                    346:  * @v eth_addr         Ethernet-compatible address to fill in
                    347:  */
                    348: static int ipoib_eth_addr ( const void *ll_addr, void *eth_addr ) {
                    349:        const struct ipoib_mac *ipoib_addr = ll_addr;
                    350:        const union ib_guid *guid = &ipoib_addr->gid.s.guid;
                    351:        struct ipoib_eth_addr_handler *handler;
                    352:        unsigned int i;
                    353: 
                    354:        for ( i = 0 ; i < ( sizeof ( ipoib_eth_addr_handlers ) /
                    355:                            sizeof ( ipoib_eth_addr_handlers[0] ) ) ; i++ ) {
                    356:                handler = &ipoib_eth_addr_handlers[i];
                    357:                if ( ( handler->byte1 == guid->bytes[1] ) &&
                    358:                     ( handler->byte2 == guid->bytes[2] ) ) {
                    359:                        return handler->eth_addr ( guid, eth_addr );
                    360:                }
                    361:        }
                    362:        return -ENOTSUP;
                    363: }
                    364: 
                    365: /** IPoIB protocol */
                    366: struct ll_protocol ipoib_protocol __ll_protocol = {
                    367:        .name           = "IPoIB",
                    368:        .ll_proto       = htons ( ARPHRD_INFINIBAND ),
                    369:        .hw_addr_len    = sizeof ( union ib_guid ),
                    370:        .ll_addr_len    = IPOIB_ALEN,
                    371:        .ll_header_len  = IPOIB_HLEN,
                    372:        .push           = ipoib_push,
                    373:        .pull           = ipoib_pull,
                    374:        .init_addr      = ipoib_init_addr,
                    375:        .ntoa           = ipoib_ntoa,
                    376:        .mc_hash        = ipoib_mc_hash,
                    377:        .eth_addr       = ipoib_eth_addr,
                    378: };
                    379: 
                    380: /**
                    381:  * Allocate IPoIB device
                    382:  *
                    383:  * @v priv_size                Size of driver private data
                    384:  * @ret netdev         Network device, or NULL
                    385:  */
                    386: struct net_device * alloc_ipoibdev ( size_t priv_size ) {
                    387:        struct net_device *netdev;
                    388: 
                    389:        netdev = alloc_netdev ( priv_size );
                    390:        if ( netdev ) {
                    391:                netdev->ll_protocol = &ipoib_protocol;
                    392:                netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
                    393:                netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;
                    394:        }
                    395:        return netdev;
                    396: }
                    397: 
                    398: /****************************************************************************
                    399:  *
                    400:  * IPoIB network device
                    401:  *
                    402:  ****************************************************************************
                    403:  */
                    404: 
                    405: /**
                    406:  * Transmit packet via IPoIB network device
                    407:  *
                    408:  * @v netdev           Network device
                    409:  * @v iobuf            I/O buffer
                    410:  * @ret rc             Return status code
                    411:  */
                    412: static int ipoib_transmit ( struct net_device *netdev,
                    413:                            struct io_buffer *iobuf ) {
                    414:        struct ipoib_device *ipoib = netdev->priv;
                    415:        struct ib_device *ibdev = ipoib->ibdev;
                    416:        struct ipoib_hdr *ipoib_hdr;
                    417:        struct ipoib_peer *dest;
                    418:        struct ib_address_vector av;
                    419:        int rc;
                    420: 
                    421:        /* Sanity check */
                    422:        if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
                    423:                DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
                    424:                return -EINVAL;
                    425:        }
                    426:        ipoib_hdr = iobuf->data;
                    427: 
                    428:        /* Attempting transmission while link is down will put the
                    429:         * queue pair into an error state, so don't try it.
                    430:         */
                    431:        if ( ! ib_link_ok ( ibdev ) )
                    432:                return -ENETUNREACH;
                    433: 
                    434:        /* Identify destination address */
                    435:        dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
                    436:        if ( ! dest )
                    437:                return -ENXIO;
                    438:        ipoib_hdr->u.reserved = 0;
                    439: 
                    440:        /* Construct address vector */
                    441:        memset ( &av, 0, sizeof ( av ) );
                    442:        av.qpn = ( ntohl ( dest->mac.flags__qpn ) & IB_QPN_MASK );
                    443:        av.gid_present = 1;
                    444:        memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
                    445:        if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
                    446:                /* Path not resolved yet */
                    447:                return rc;
                    448:        }
                    449: 
                    450:        return ib_post_send ( ibdev, ipoib->qp, &av, iobuf );
                    451: }
                    452: 
                    453: /**
                    454:  * Handle IPoIB send completion
                    455:  *
                    456:  * @v ibdev            Infiniband device
                    457:  * @v qp               Queue pair
                    458:  * @v iobuf            I/O buffer
                    459:  * @v rc               Completion status code
                    460:  */
                    461: static void ipoib_complete_send ( struct ib_device *ibdev __unused,
                    462:                                  struct ib_queue_pair *qp,
                    463:                                  struct io_buffer *iobuf, int rc ) {
                    464:        struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
                    465: 
                    466:        netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
                    467: }
                    468: 
                    469: /**
                    470:  * Handle IPoIB receive completion
                    471:  *
                    472:  * @v ibdev            Infiniband device
                    473:  * @v qp               Queue pair
                    474:  * @v av               Address vector, or NULL
                    475:  * @v iobuf            I/O buffer
                    476:  * @v rc               Completion status code
                    477:  */
                    478: static void ipoib_complete_recv ( struct ib_device *ibdev __unused,
                    479:                                  struct ib_queue_pair *qp,
                    480:                                  struct ib_address_vector *av,
                    481:                                  struct io_buffer *iobuf, int rc ) {
                    482:        struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
                    483:        struct net_device *netdev = ipoib->netdev;
                    484:        struct ipoib_hdr *ipoib_hdr;
                    485:        struct ipoib_mac ll_src;
                    486:        struct ipoib_peer *src;
                    487: 
                    488:        /* Record errors */
                    489:        if ( rc != 0 ) {
                    490:                netdev_rx_err ( netdev, iobuf, rc );
                    491:                return;
                    492:        }
                    493: 
                    494:        /* Sanity check */
                    495:        if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
                    496:                DBGC ( ipoib, "IPoIB %p received packet too short to "
                    497:                       "contain IPoIB header\n", ipoib );
                    498:                DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
                    499:                netdev_rx_err ( netdev, iobuf, -EIO );
                    500:                return;
                    501:        }
                    502:        ipoib_hdr = iobuf->data;
                    503:        if ( ! av ) {
                    504:                DBGC ( ipoib, "IPoIB %p received packet without address "
                    505:                       "vector\n", ipoib );
                    506:                netdev_rx_err ( netdev, iobuf, -ENOTTY );
                    507:                return;
                    508:        }
                    509: 
                    510:        /* Parse source address */
                    511:        if ( av->gid_present ) {
                    512:                ll_src.flags__qpn = htonl ( av->qpn );
                    513:                memcpy ( &ll_src.gid, &av->gid, sizeof ( ll_src.gid ) );
                    514:                src = ipoib_cache_peer ( &ll_src );
                    515:                ipoib_hdr->u.peer.src = src->key;
                    516:        }
                    517: 
                    518:        /* Hand off to network layer */
                    519:        netdev_rx ( netdev, iobuf );
                    520: }
                    521: 
                    522: /** IPoIB completion operations */
                    523: static struct ib_completion_queue_operations ipoib_cq_op = {
                    524:        .complete_send = ipoib_complete_send,
                    525:        .complete_recv = ipoib_complete_recv,
                    526: };
                    527: 
                    528: /**
                    529:  * Poll IPoIB network device
                    530:  *
                    531:  * @v netdev           Network device
                    532:  */
                    533: static void ipoib_poll ( struct net_device *netdev ) {
                    534:        struct ipoib_device *ipoib = netdev->priv;
                    535:        struct ib_device *ibdev = ipoib->ibdev;
                    536: 
                    537:        ib_poll_eq ( ibdev );
                    538: }
                    539: 
                    540: /**
                    541:  * Handle IPv4 broadcast multicast group join completion
                    542:  *
                    543:  * @v ibdev            Infiniband device
                    544:  * @v qp               Queue pair
                    545:  * @v membership       Multicast group membership
                    546:  * @v rc               Status code
                    547:  * @v mad              Response MAD (or NULL on error)
                    548:  */
                    549: void ipoib_join_complete ( struct ib_device *ibdev __unused,
                    550:                           struct ib_queue_pair *qp __unused,
                    551:                           struct ib_mc_membership *membership, int rc,
                    552:                           union ib_mad *mad __unused ) {
                    553:        struct ipoib_device *ipoib = container_of ( membership,
                    554:                                   struct ipoib_device, broadcast_membership );
                    555: 
                    556:        /* Record join status as link status */
                    557:        netdev_link_err ( ipoib->netdev, rc );
                    558: }
                    559: 
                    560: /**
                    561:  * Join IPv4 broadcast multicast group
                    562:  *
                    563:  * @v ipoib            IPoIB device
                    564:  * @ret rc             Return status code
                    565:  */
                    566: static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
                    567:        int rc;
                    568: 
                    569:        if ( ( rc = ib_mcast_join ( ipoib->ibdev, ipoib->qp,
                    570:                                    &ipoib->broadcast_membership,
                    571:                                    &ipoib->broadcast.gid,
                    572:                                    ipoib_join_complete ) ) != 0 ) {
                    573:                DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
                    574:                       ipoib, strerror ( rc ) );
                    575:                return rc;
                    576:        }
                    577:        ipoib->broadcast_joined = 1;
                    578: 
                    579:        return 0;
                    580: }
                    581: 
                    582: /**
                    583:  * Leave IPv4 broadcast multicast group
                    584:  *
                    585:  * @v ipoib            IPoIB device
                    586:  */
                    587: static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
                    588: 
                    589:        if ( ipoib->broadcast_joined ) {
                    590:                ib_mcast_leave ( ipoib->ibdev, ipoib->qp,
                    591:                                 &ipoib->broadcast_membership );
                    592:                ipoib->broadcast_joined = 0;
                    593:        }
                    594: }
                    595: 
                    596: /**
                    597:  * Handle link status change
                    598:  *
                    599:  * @v ibdev            Infiniband device
                    600:  */
                    601: static void ipoib_link_state_changed ( struct ib_device *ibdev ) {
                    602:        struct net_device *netdev = ib_get_ownerdata ( ibdev );
                    603:        struct ipoib_device *ipoib = netdev->priv;
                    604:        struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
                    605:        int rc;
                    606: 
                    607:        /* Leave existing broadcast group */
                    608:        ipoib_leave_broadcast_group ( ipoib );
                    609: 
                    610:        /* Update MAC address based on potentially-new GID prefix */
                    611:        memcpy ( &mac->gid.s.prefix, &ibdev->gid.s.prefix,
                    612:                 sizeof ( mac->gid.s.prefix ) );
                    613: 
                    614:        /* Update broadcast GID based on potentially-new partition key */
                    615:        ipoib->broadcast.gid.words[2] =
                    616:                htons ( ibdev->pkey | IB_PKEY_FULL );
                    617: 
                    618:        /* Set net device link state to reflect Infiniband link state */
                    619:        rc = ib_link_rc ( ibdev );
                    620:        netdev_link_err ( netdev, ( rc ? rc : -EINPROGRESS_JOINING ) );
                    621: 
                    622:        /* Join new broadcast group */
                    623:        if ( ib_is_open ( ibdev ) && ib_link_ok ( ibdev ) &&
                    624:             ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) ) {
                    625:                DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
                    626:                       "%s\n", ipoib, strerror ( rc ) );
                    627:                netdev_link_err ( netdev, rc );
                    628:                return;
                    629:        }
                    630: }
                    631: 
                    632: /**
                    633:  * Open IPoIB network device
                    634:  *
                    635:  * @v netdev           Network device
                    636:  * @ret rc             Return status code
                    637:  */
                    638: static int ipoib_open ( struct net_device *netdev ) {
                    639:        struct ipoib_device *ipoib = netdev->priv;
                    640:        struct ib_device *ibdev = ipoib->ibdev;
                    641:        struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
                    642:        int rc;
                    643: 
                    644:        /* Open IB device */
                    645:        if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
                    646:                DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
                    647:                       ipoib, strerror ( rc ) );
                    648:                goto err_ib_open;
                    649:        }
                    650: 
                    651:        /* Allocate completion queue */
                    652:        ipoib->cq = ib_create_cq ( ibdev, IPOIB_NUM_CQES, &ipoib_cq_op );
                    653:        if ( ! ipoib->cq ) {
                    654:                DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
                    655:                       ipoib );
                    656:                rc = -ENOMEM;
                    657:                goto err_create_cq;
                    658:        }
                    659: 
                    660:        /* Allocate queue pair */
                    661:        ipoib->qp = ib_create_qp ( ibdev, IB_QPT_UD,
                    662:                                   IPOIB_NUM_SEND_WQES, ipoib->cq,
                    663:                                   IPOIB_NUM_RECV_WQES, ipoib->cq );
                    664:        if ( ! ipoib->qp ) {
                    665:                DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
                    666:                       ipoib );
                    667:                rc = -ENOMEM;
                    668:                goto err_create_qp;
                    669:        }
                    670:        ib_qp_set_ownerdata ( ipoib->qp, ipoib );
                    671: 
                    672:        /* Update MAC address with QPN */
                    673:        mac->flags__qpn = htonl ( ipoib->qp->qpn );
                    674: 
                    675:        /* Fill receive rings */
                    676:        ib_refill_recv ( ibdev, ipoib->qp );
                    677: 
                    678:        /* Fake a link status change to join the broadcast group */
                    679:        ipoib_link_state_changed ( ibdev );
                    680: 
                    681:        return 0;
                    682: 
                    683:        ib_destroy_qp ( ibdev, ipoib->qp );
                    684:  err_create_qp:
                    685:        ib_destroy_cq ( ibdev, ipoib->cq );
                    686:  err_create_cq:
                    687:        ib_close ( ibdev );
                    688:  err_ib_open:
                    689:        return rc;
                    690: }
                    691: 
                    692: /**
                    693:  * Close IPoIB network device
                    694:  *
                    695:  * @v netdev           Network device
                    696:  */
                    697: static void ipoib_close ( struct net_device *netdev ) {
                    698:        struct ipoib_device *ipoib = netdev->priv;
                    699:        struct ib_device *ibdev = ipoib->ibdev;
                    700:        struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
                    701: 
                    702:        /* Leave broadcast group */
                    703:        ipoib_leave_broadcast_group ( ipoib );
                    704: 
                    705:        /* Remove QPN from MAC address */
                    706:        mac->flags__qpn = 0;
                    707: 
                    708:        /* Tear down the queues */
                    709:        ib_destroy_qp ( ibdev, ipoib->qp );
                    710:        ib_destroy_cq ( ibdev, ipoib->cq );
                    711: 
                    712:        /* Close IB device */
                    713:        ib_close ( ibdev );
                    714: }
                    715: 
                    716: /** IPoIB network device operations */
                    717: static struct net_device_operations ipoib_operations = {
                    718:        .open           = ipoib_open,
                    719:        .close          = ipoib_close,
                    720:        .transmit       = ipoib_transmit,
                    721:        .poll           = ipoib_poll,
                    722: };
                    723: 
                    724: /**
                    725:  * Probe IPoIB device
                    726:  *
                    727:  * @v ibdev            Infiniband device
                    728:  * @ret rc             Return status code
                    729:  */
                    730: static int ipoib_probe ( struct ib_device *ibdev ) {
                    731:        struct net_device *netdev;
                    732:        struct ipoib_device *ipoib;
                    733:        int rc;
                    734: 
                    735:        /* Allocate network device */
                    736:        netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
                    737:        if ( ! netdev )
                    738:                return -ENOMEM;
                    739:        netdev_init ( netdev, &ipoib_operations );
                    740:        ipoib = netdev->priv;
                    741:        ib_set_ownerdata ( ibdev, netdev );
                    742:        netdev->dev = ibdev->dev;
                    743:        memset ( ipoib, 0, sizeof ( *ipoib ) );
                    744:        ipoib->netdev = netdev;
                    745:        ipoib->ibdev = ibdev;
                    746: 
                    747:        /* Extract hardware address */
                    748:        memcpy ( netdev->hw_addr, &ibdev->gid.s.guid,
                    749:                 sizeof ( ibdev->gid.s.guid ) );
                    750: 
                    751:        /* Set default broadcast address */
                    752:        memcpy ( &ipoib->broadcast, &ipoib_broadcast,
                    753:                 sizeof ( ipoib->broadcast ) );
                    754:        netdev->ll_broadcast = ( ( uint8_t * ) &ipoib->broadcast );
                    755: 
                    756:        /* Register network device */
                    757:        if ( ( rc = register_netdev ( netdev ) ) != 0 )
                    758:                goto err_register_netdev;
                    759: 
                    760:        return 0;
                    761: 
                    762:  err_register_netdev:
                    763:        netdev_nullify ( netdev );
                    764:        netdev_put ( netdev );
                    765:        return rc;
                    766: }
                    767: 
                    768: /**
                    769:  * Remove IPoIB device
                    770:  *
                    771:  * @v ibdev            Infiniband device
                    772:  */
                    773: static void ipoib_remove ( struct ib_device *ibdev ) {
                    774:        struct net_device *netdev = ib_get_ownerdata ( ibdev );
                    775: 
                    776:        unregister_netdev ( netdev );
                    777:        netdev_nullify ( netdev );
                    778:        netdev_put ( netdev );
                    779: }
                    780: 
                    781: /** IPoIB driver */
                    782: struct ib_driver ipoib_driver __ib_driver = {
                    783:        .name = "IPoIB",
                    784:        .probe = ipoib_probe,
                    785:        .notify = ipoib_link_state_changed,
                    786:        .remove = ipoib_remove,
                    787: };

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.