|
|
1.1 root 1: /*
2: * Copyright (C) 2009 Michael Brown <[email protected]>.
3: *
4: * This program is free software; you can redistribute it and/or
5: * modify it under the terms of the GNU General Public License as
6: * published by the Free Software Foundation; either version 2 of the
7: * License, or any later version.
8: *
9: * This program is distributed in the hope that it will be useful, but
10: * WITHOUT ANY WARRANTY; without even the implied warranty of
11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12: * General Public License for more details.
13: *
14: * You should have received a copy of the GNU General Public License
15: * along with this program; if not, write to the Free Software
16: * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17: */
18:
19: FILE_LICENCE ( GPL2_OR_LATER );
20:
21: #include <stdint.h>
22: #include <stdlib.h>
23: #include <string.h>
24: #include <byteswap.h>
25: #include <errno.h>
26: #include <assert.h>
27: #include <ipxe/infiniband.h>
28: #include <ipxe/ib_mi.h>
29: #include <ipxe/ib_pathrec.h>
30: #include <ipxe/ib_cm.h>
31:
32: /**
33: * @file
34: *
35: * Infiniband communication management
36: *
37: */
38:
39: /** List of connections */
40: static LIST_HEAD ( ib_cm_conns );
41:
42: /**
43: * Find connection by local communication ID
44: *
45: * @v local_id Local communication ID
46: * @ret conn Connection, or NULL
47: */
48: static struct ib_connection * ib_cm_find ( uint32_t local_id ) {
49: struct ib_connection *conn;
50:
51: list_for_each_entry ( conn, &ib_cm_conns, list ) {
52: if ( conn->local_id == local_id )
53: return conn;
54: }
55: return NULL;
56: }
57:
58: /**
59: * Send "ready to use" response
60: *
61: * @v ibdev Infiniband device
62: * @v mi Management interface
63: * @v av Address vector
64: * @v local_id Local communication ID
65: * @v remote_id Remote communication ID
66: * @ret rc Return status code
67: */
68: static int ib_cm_send_rtu ( struct ib_device *ibdev,
69: struct ib_mad_interface *mi,
70: struct ib_address_vector *av,
71: uint32_t local_id, uint32_t remote_id ) {
72: union ib_mad mad;
73: struct ib_cm_ready_to_use *rtu = &mad.cm.cm_data.ready_to_use;
74: int rc;
75:
76: /* Construct "ready to use" response */
77: memset ( &mad, 0, sizeof ( mad ) );
78: mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
79: mad.hdr.class_version = IB_CM_CLASS_VERSION;
80: mad.hdr.method = IB_MGMT_METHOD_SEND;
81: mad.hdr.attr_id = htons ( IB_CM_ATTR_READY_TO_USE );
82: rtu->local_id = htonl ( local_id );
83: rtu->remote_id = htonl ( remote_id );
84: if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
85: DBG ( "CM could not send RTU: %s\n", strerror ( rc ) );
86: return rc;
87: }
88:
89: return 0;
90: }
91:
92: /**
93: * Handle duplicate connection replies
94: *
95: * @v ibdev Infiniband device
96: * @v mi Management interface
97: * @v mad Received MAD
98: * @v av Source address vector
99: * @ret rc Return status code
100: *
101: * If a "ready to use" MAD is lost, the peer may resend the connection
102: * reply. We have to respond to these with duplicate "ready to use"
103: * MADs, otherwise the peer may time out and drop the connection.
104: */
105: static void ib_cm_recv_rep ( struct ib_device *ibdev,
106: struct ib_mad_interface *mi,
107: union ib_mad *mad,
108: struct ib_address_vector *av ) {
109: struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
110: struct ib_connection *conn;
111: uint32_t local_id = ntohl ( rep->remote_id );
112: int rc;
113:
114: /* Identify connection */
115: conn = ib_cm_find ( local_id );
116: if ( conn ) {
117: /* Try to send "ready to use" reply */
118: if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
119: conn->remote_id ) ) != 0 ) {
120: /* Ignore errors; the remote end will retry */
121: }
122: } else {
123: DBG ( "CM unidentified connection %08x\n", local_id );
124: }
125: }
126:
127: /**
128: * Send reply to disconnection request
129: *
130: * @v ibdev Infiniband device
131: * @v mi Management interface
132: * @v av Address vector
133: * @v local_id Local communication ID
134: * @v remote_id Remote communication ID
135: * @ret rc Return status code
136: */
137: static int ib_cm_send_drep ( struct ib_device *ibdev,
138: struct ib_mad_interface *mi,
139: struct ib_address_vector *av,
140: uint32_t local_id, uint32_t remote_id ) {
141: union ib_mad mad;
142: struct ib_cm_disconnect_reply *drep = &mad.cm.cm_data.disconnect_reply;
143: int rc;
144:
145: /* Construct reply to disconnection request */
146: memset ( &mad, 0, sizeof ( mad ) );
147: mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
148: mad.hdr.class_version = IB_CM_CLASS_VERSION;
149: mad.hdr.method = IB_MGMT_METHOD_SEND;
150: mad.hdr.attr_id = htons ( IB_CM_ATTR_DISCONNECT_REPLY );
151: drep->local_id = htonl ( local_id );
152: drep->remote_id = htonl ( remote_id );
153: if ( ( rc = ib_mi_send ( ibdev, mi, &mad, av ) ) != 0 ){
154: DBG ( "CM could not send DREP: %s\n", strerror ( rc ) );
155: return rc;
156: }
157:
158: return 0;
159: }
160:
161: /**
162: * Handle disconnection requests
163: *
164: * @v ibdev Infiniband device
165: * @v mi Management interface
166: * @v mad Received MAD
167: * @v av Source address vector
168: * @ret rc Return status code
169: */
170: static void ib_cm_recv_dreq ( struct ib_device *ibdev,
171: struct ib_mad_interface *mi,
172: union ib_mad *mad,
173: struct ib_address_vector *av ) {
174: struct ib_cm_disconnect_request *dreq =
175: &mad->cm.cm_data.disconnect_request;
176: struct ib_connection *conn;
177: uint32_t local_id = ntohl ( dreq->remote_id );
178: uint32_t remote_id = ntohl ( dreq->local_id );
179: int rc;
180:
181: /* Identify connection */
182: conn = ib_cm_find ( local_id );
183: if ( conn ) {
184: /* Notify upper layer */
185: conn->op->changed ( ibdev, conn->qp, conn, -ENOTCONN,
186: &dreq->private_data,
187: sizeof ( dreq->private_data ) );
188: } else {
189: DBG ( "CM unidentified connection %08x\n", local_id );
190: }
191:
192: /* Send reply */
193: if ( ( rc = ib_cm_send_drep ( ibdev, mi, av, local_id,
194: remote_id ) ) != 0 ) {
195: /* Ignore errors; the remote end will retry */
196: }
197: };
198:
199: /** Communication management agents */
200: struct ib_mad_agent ib_cm_agent[] __ib_mad_agent = {
201: {
202: .mgmt_class = IB_MGMT_CLASS_CM,
203: .class_version = IB_CM_CLASS_VERSION,
204: .attr_id = htons ( IB_CM_ATTR_CONNECT_REPLY ),
205: .handle = ib_cm_recv_rep,
206: },
207: {
208: .mgmt_class = IB_MGMT_CLASS_CM,
209: .class_version = IB_CM_CLASS_VERSION,
210: .attr_id = htons ( IB_CM_ATTR_DISCONNECT_REQUEST ),
211: .handle = ib_cm_recv_dreq,
212: },
213: };
214:
215: /**
216: * Convert connection rejection reason to return status code
217: *
218: * @v reason Rejection reason (in network byte order)
219: * @ret rc Return status code
220: */
221: static int ib_cm_rejection_reason_to_rc ( uint16_t reason ) {
222: switch ( reason ) {
223: case htons ( IB_CM_REJECT_BAD_SERVICE_ID ) :
224: return -ENODEV;
225: case htons ( IB_CM_REJECT_STALE_CONN ) :
226: return -EALREADY;
227: case htons ( IB_CM_REJECT_CONSUMER ) :
228: return -ENOTTY;
229: default:
230: return -EPERM;
231: }
232: }
233:
234: /**
235: * Handle connection request transaction completion
236: *
237: * @v ibdev Infiniband device
238: * @v mi Management interface
239: * @v madx Management transaction
240: * @v rc Status code
241: * @v mad Received MAD (or NULL on error)
242: * @v av Source address vector (or NULL on error)
243: */
244: static void ib_cm_req_complete ( struct ib_device *ibdev,
245: struct ib_mad_interface *mi,
246: struct ib_mad_transaction *madx,
247: int rc, union ib_mad *mad,
248: struct ib_address_vector *av ) {
249: struct ib_connection *conn = ib_madx_get_ownerdata ( madx );
250: struct ib_queue_pair *qp = conn->qp;
251: struct ib_cm_common *common = &mad->cm.cm_data.common;
252: struct ib_cm_connect_reply *rep = &mad->cm.cm_data.connect_reply;
253: struct ib_cm_connect_reject *rej = &mad->cm.cm_data.connect_reject;
254: void *private_data = NULL;
255: size_t private_data_len = 0;
256:
257: /* Report failures */
258: if ( ( rc == 0 ) && ( mad->hdr.status != htons ( IB_MGMT_STATUS_OK ) ))
259: rc = -EIO;
260: if ( rc != 0 ) {
261: DBGC ( conn, "CM %p connection request failed: %s\n",
262: conn, strerror ( rc ) );
263: goto out;
264: }
265:
266: /* Record remote communication ID */
267: conn->remote_id = ntohl ( common->local_id );
268:
269: /* Handle response */
270: switch ( mad->hdr.attr_id ) {
271:
272: case htons ( IB_CM_ATTR_CONNECT_REPLY ) :
273: /* Extract fields */
274: qp->av.qpn = ( ntohl ( rep->local_qpn ) >> 8 );
275: qp->send.psn = ( ntohl ( rep->starting_psn ) >> 8 );
276: private_data = &rep->private_data;
277: private_data_len = sizeof ( rep->private_data );
278: DBGC ( conn, "CM %p connected to QPN %lx PSN %x\n",
279: conn, qp->av.qpn, qp->send.psn );
280:
281: /* Modify queue pair */
282: if ( ( rc = ib_modify_qp ( ibdev, qp ) ) != 0 ) {
283: DBGC ( conn, "CM %p could not modify queue pair: %s\n",
284: conn, strerror ( rc ) );
285: goto out;
286: }
287:
288: /* Send "ready to use" reply */
289: if ( ( rc = ib_cm_send_rtu ( ibdev, mi, av, conn->local_id,
290: conn->remote_id ) ) != 0 ) {
291: /* Treat as non-fatal */
292: rc = 0;
293: }
294: break;
295:
296: case htons ( IB_CM_ATTR_CONNECT_REJECT ) :
297: /* Extract fields */
298: DBGC ( conn, "CM %p connection rejected (reason %d)\n",
299: conn, ntohs ( rej->reason ) );
300: /* Private data is valid only for a Consumer Reject */
301: if ( rej->reason == htons ( IB_CM_REJECT_CONSUMER ) ) {
302: private_data = &rej->private_data;
303: private_data_len = sizeof ( rej->private_data );
304: }
305: rc = ib_cm_rejection_reason_to_rc ( rej->reason );
306: break;
307:
308: default:
309: DBGC ( conn, "CM %p unexpected response (attribute %04x)\n",
310: conn, ntohs ( mad->hdr.attr_id ) );
311: rc = -ENOTSUP;
312: break;
313: }
314:
315: out:
316: /* Destroy the completed transaction */
317: ib_destroy_madx ( ibdev, ibdev->gsi, madx );
318: conn->madx = NULL;
319:
320: /* Hand off to the upper completion handler */
321: conn->op->changed ( ibdev, qp, conn, rc, private_data,
322: private_data_len );
323: }
324:
325: /** Connection request operations */
326: static struct ib_mad_transaction_operations ib_cm_req_op = {
327: .complete = ib_cm_req_complete,
328: };
329:
330: /**
331: * Handle connection path transaction completion
332: *
333: * @v ibdev Infiniband device
334: * @v path Path
335: * @v rc Status code
336: * @v av Address vector, or NULL on error
337: */
338: static void ib_cm_path_complete ( struct ib_device *ibdev,
339: struct ib_path *path, int rc,
340: struct ib_address_vector *av ) {
341: struct ib_connection *conn = ib_path_get_ownerdata ( path );
342: struct ib_queue_pair *qp = conn->qp;
343: union ib_mad mad;
344: struct ib_cm_connect_request *req = &mad.cm.cm_data.connect_request;
345: size_t private_data_len;
346:
347: /* Report failures */
348: if ( rc != 0 ) {
349: DBGC ( conn, "CM %p path lookup failed: %s\n",
350: conn, strerror ( rc ) );
351: conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
352: goto out;
353: }
354:
355: /* Update queue pair peer path */
356: memcpy ( &qp->av, av, sizeof ( qp->av ) );
357:
358: /* Construct connection request */
359: memset ( &mad, 0, sizeof ( mad ) );
360: mad.hdr.mgmt_class = IB_MGMT_CLASS_CM;
361: mad.hdr.class_version = IB_CM_CLASS_VERSION;
362: mad.hdr.method = IB_MGMT_METHOD_SEND;
363: mad.hdr.attr_id = htons ( IB_CM_ATTR_CONNECT_REQUEST );
364: req->local_id = htonl ( conn->local_id );
365: memcpy ( &req->service_id, &conn->service_id,
366: sizeof ( req->service_id ) );
367: memcpy ( &req->local_ca, &ibdev->node_guid, sizeof ( req->local_ca ) );
368: req->local_qpn__responder_resources = htonl ( ( qp->qpn << 8 ) | 1 );
369: req->local_eecn__initiator_depth = htonl ( ( 0 << 8 ) | 1 );
370: req->remote_eecn__remote_timeout__service_type__ee_flow_ctrl =
371: htonl ( ( 0x14 << 3 ) | ( IB_CM_TRANSPORT_RC << 1 ) |
372: ( 0 << 0 ) );
373: req->starting_psn__local_timeout__retry_count =
374: htonl ( ( qp->recv.psn << 8 ) | ( 0x14 << 3 ) |
375: ( 0x07 << 0 ) );
376: req->pkey = htons ( ibdev->pkey );
377: req->payload_mtu__rdc_exists__rnr_retry =
378: ( ( IB_MTU_2048 << 4 ) | ( 1 << 3 ) | ( 0x07 << 0 ) );
379: req->max_cm_retries__srq = ( ( 0x0f << 4 ) | ( 0 << 3 ) );
380: req->primary.local_lid = htons ( ibdev->lid );
381: req->primary.remote_lid = htons ( conn->qp->av.lid );
382: memcpy ( &req->primary.local_gid, &ibdev->gid,
383: sizeof ( req->primary.local_gid ) );
384: memcpy ( &req->primary.remote_gid, &conn->qp->av.gid,
385: sizeof ( req->primary.remote_gid ) );
386: req->primary.flow_label__rate =
387: htonl ( ( 0 << 12 ) | ( conn->qp->av.rate << 0 ) );
388: req->primary.hop_limit = 0;
389: req->primary.sl__subnet_local =
390: ( ( conn->qp->av.sl << 4 ) | ( 1 << 3 ) );
391: req->primary.local_ack_timeout = ( 0x13 << 3 );
392: private_data_len = conn->private_data_len;
393: if ( private_data_len > sizeof ( req->private_data ) )
394: private_data_len = sizeof ( req->private_data );
395: memcpy ( &req->private_data, &conn->private_data, private_data_len );
396:
397: /* Create connection request */
398: av->qpn = IB_QPN_GSI;
399: av->qkey = IB_QKEY_GSI;
400: conn->madx = ib_create_madx ( ibdev, ibdev->gsi, &mad, av,
401: &ib_cm_req_op );
402: if ( ! conn->madx ) {
403: DBGC ( conn, "CM %p could not create connection request\n",
404: conn );
405: conn->op->changed ( ibdev, qp, conn, rc, NULL, 0 );
406: goto out;
407: }
408: ib_madx_set_ownerdata ( conn->madx, conn );
409:
410: out:
411: /* Destroy the completed transaction */
412: ib_destroy_path ( ibdev, path );
413: conn->path = NULL;
414: }
415:
416: /** Connection path operations */
417: static struct ib_path_operations ib_cm_path_op = {
418: .complete = ib_cm_path_complete,
419: };
420:
421: /**
422: * Create connection to remote QP
423: *
424: * @v ibdev Infiniband device
425: * @v qp Queue pair
426: * @v dgid Target GID
427: * @v service_id Target service ID
428: * @v private_data Connection request private data
429: * @v private_data_len Length of connection request private data
430: * @v op Connection operations
431: * @ret conn Connection
432: */
433: struct ib_connection *
434: ib_create_conn ( struct ib_device *ibdev, struct ib_queue_pair *qp,
435: union ib_gid *dgid, union ib_guid *service_id,
436: void *private_data, size_t private_data_len,
437: struct ib_connection_operations *op ) {
438: struct ib_connection *conn;
439:
440: /* Allocate and initialise request */
441: conn = zalloc ( sizeof ( *conn ) + private_data_len );
442: if ( ! conn )
443: goto err_alloc_conn;
444: conn->ibdev = ibdev;
445: conn->qp = qp;
446: memset ( &qp->av, 0, sizeof ( qp->av ) );
447: qp->av.gid_present = 1;
448: memcpy ( &qp->av.gid, dgid, sizeof ( qp->av.gid ) );
449: conn->local_id = random();
450: memcpy ( &conn->service_id, service_id, sizeof ( conn->service_id ) );
451: conn->op = op;
452: conn->private_data_len = private_data_len;
453: memcpy ( &conn->private_data, private_data, private_data_len );
454:
455: /* Create path */
456: conn->path = ib_create_path ( ibdev, &qp->av, &ib_cm_path_op );
457: if ( ! conn->path )
458: goto err_create_path;
459: ib_path_set_ownerdata ( conn->path, conn );
460:
461: /* Add to list of connections */
462: list_add ( &conn->list, &ib_cm_conns );
463:
464: DBGC ( conn, "CM %p created for IBDEV %p QPN %lx\n",
465: conn, ibdev, qp->qpn );
466: DBGC ( conn, "CM %p connecting to " IB_GID_FMT " " IB_GUID_FMT "\n",
467: conn, IB_GID_ARGS ( dgid ), IB_GUID_ARGS ( service_id ) );
468:
469: return conn;
470:
471: ib_destroy_path ( ibdev, conn->path );
472: err_create_path:
473: free ( conn );
474: err_alloc_conn:
475: return NULL;
476: }
477:
478: /**
479: * Destroy connection to remote QP
480: *
481: * @v ibdev Infiniband device
482: * @v qp Queue pair
483: * @v conn Connection
484: */
485: void ib_destroy_conn ( struct ib_device *ibdev,
486: struct ib_queue_pair *qp __unused,
487: struct ib_connection *conn ) {
488:
489: list_del ( &conn->list );
490: if ( conn->madx )
491: ib_destroy_madx ( ibdev, ibdev->gsi, conn->madx );
492: if ( conn->path )
493: ib_destroy_path ( ibdev, conn->path );
494: free ( conn );
495: }
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.