|
|
1.1 root 1: /************************************************* -*- linux-c -*-
2: * Myricom 10Gb Network Interface Card Software
3: * Copyright 2009, Myricom, Inc.
4: *
5: * This program is free software; you can redistribute it and/or
6: * modify it under the terms of the GNU General Public License,
7: * version 2, as published by the Free Software Foundation.
8: *
9: * This program is distributed in the hope that it will be useful,
10: * but WITHOUT ANY WARRANTY; without even the implied warranty of
11: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12: * GNU General Public License for more details.
13: *
14: * You should have received a copy of the GNU General Public License
15: * along with this program; if not, write to the Free Software
16: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17: ****************************************************************/
18:
19: FILE_LICENCE ( GPL2_ONLY );
20:
21: /*
22: * Author: Glenn Brown <[email protected]>
23: */
24:
25: /*
26: * General Theory of Operation
27: *
28: * This is a minimal Myricom 10 gigabit Ethernet driver for network
29: * boot.
30: *
31: * Initialization
32: *
33: * myri10ge_pci_probe() is called by iPXE during initialization.
34: * Minimal NIC initialization is performed to minimize resources
35: * consumed when the driver is resident but unused.
36: *
37: * Network Boot
38: *
39: * myri10ge_net_open() is called by iPXE before attempting to network
40: * boot from the card. Packet buffers are allocated and the NIC
41: * interface is initialized.
42: *
43: * Transmit
44: *
45: * myri10ge_net_transmit() enqueues frames for transmission by writing
46: * discriptors to the NIC's tx ring. For simplicity and to avoid
47: * copies, we always have the NIC DMA up the packet. The sent I/O
48: * buffer is released once the NIC signals myri10ge_interrupt_handler()
49: * that the send has completed.
50: *
51: * Receive
52: *
53: * Receives are posted to the NIC's receive ring. The NIC fills a
54: * DMAable receive_completion ring with completion notifications.
55: * myri10ge_net_poll() polls for these receive notifications, posts
56: * replacement receive buffers to the NIC, and passes received frames
57: * to netdev_rx().
58: *
59: * NonVolatile Storage
60: *
61: * This driver supports NonVolatile Storage (nvs) in the NIC EEPROM.
62: * If the last EEPROM block is not otherwise filled, we tell
63: * iPXE it may store NonVolatile Options (nvo) there.
64: */
65:
66: /*
67: * Debugging levels:
68: * - DBG() is for any errors, i.e. failed alloc_iob(), malloc_dma(),
69: * TX overflow, corrupted packets, ...
70: * - DBG2() is for successful events, like packet received,
71: * packet transmitted, and other general notifications.
72: * - DBGP() prints the name of each called function on entry
73: */
74:
75: #include <stdint.h>
76:
77: #include <byteswap.h>
78: #include <errno.h>
79: #include <ipxe/ethernet.h>
80: #include <ipxe/if_ether.h>
81: #include <ipxe/iobuf.h>
82: #include <ipxe/malloc.h>
83: #include <ipxe/netdevice.h>
84: #include <ipxe/nvo.h>
85: #include <ipxe/nvs.h>
86: #include <ipxe/pci.h>
87: #include <ipxe/timer.h>
88:
89: #include "myri10ge_mcp.h"
90:
91: /****************************************************************
92: * Forward declarations
93: ****************************************************************/
94:
95: /* PCI driver entry points */
96:
97: static int myri10ge_pci_probe ( struct pci_device* );
98: static void myri10ge_pci_remove ( struct pci_device* );
99:
100: /* Network device operations */
101:
102: static void myri10ge_net_close ( struct net_device* );
103: static void myri10ge_net_irq ( struct net_device*, int enable );
104: static int myri10ge_net_open ( struct net_device* );
105: static void myri10ge_net_poll ( struct net_device* );
106: static int myri10ge_net_transmit ( struct net_device*, struct io_buffer* );
107:
108: /****************************************************************
109: * Constants
110: ****************************************************************/
111:
112: /* Maximum ring indices, used to wrap ring indices. These must be 2**N-1. */
113:
114: #define MYRI10GE_TRANSMIT_WRAP 1U
115: #define MYRI10GE_RECEIVE_WRAP 7U
116: #define MYRI10GE_RECEIVE_COMPLETION_WRAP 31U
117:
118: /****************************************************************
119: * Driver internal data types.
120: ****************************************************************/
121:
122: /* Structure holding all DMA buffers for a NIC, which we will
123: allocated as contiguous read/write DMAable memory when the NIC is
124: initialized. */
125:
126: struct myri10ge_dma_buffers
127: {
128: /* The NIC DMAs receive completion notifications into this ring */
129:
130: mcp_slot_t receive_completion[1+MYRI10GE_RECEIVE_COMPLETION_WRAP];
131:
132: /* Interrupt details are DMAd here before interrupting. */
133:
134: mcp_irq_data_t irq_data; /* 64B */
135:
136: /* NIC command completion status is DMAd here. */
137:
138: mcp_cmd_response_t command_response; /* 8B */
139: };
140:
141: struct myri10ge_private
142: {
143: /* Interrupt support */
144:
145: uint32 *irq_claim; /* in NIC SRAM */
146: uint32 *irq_deassert; /* in NIC SRAM */
147:
148: /* DMA buffers. */
149:
150: struct myri10ge_dma_buffers *dma;
151:
152: /*
153: * Transmit state.
154: *
155: * The counts here are uint32 for easy comparison with
156: * priv->dma->irq_data.send_done_count and with each other.
157: */
158:
159: mcp_kreq_ether_send_t *transmit_ring; /* in NIC SRAM */
160: uint32 transmit_ring_wrap;
161: uint32 transmits_posted;
162: uint32 transmits_done;
163: struct io_buffer *transmit_iob[1 + MYRI10GE_TRANSMIT_WRAP];
164:
165: /*
166: * Receive state.
167: */
168:
169: mcp_kreq_ether_recv_t *receive_post_ring; /* in NIC SRAM */
170: unsigned int receive_post_ring_wrap;
171: unsigned int receives_posted;
172: unsigned int receives_done;
173: struct io_buffer *receive_iob[1 + MYRI10GE_RECEIVE_WRAP];
174:
175: /* Address for writing commands to the firmware.
176: BEWARE: the value must be written 32 bits at a time. */
177:
178: mcp_cmd_t *command;
179:
180: /*
181: * Nonvolatile Storage for configuration options.
182: */
183:
184: struct nvs_device nvs;
185: struct nvo_block nvo;
186: unsigned int nvo_registered;
187:
188: /* Cached PCI capability locations. */
189:
190: uint8 pci_cap_vs;
191: };
192:
193: /****************************************************************
194: * Driver internal functions.
195: ****************************************************************/
196:
197: /* Print ring status when debugging. Use this only after a printed
198: value changes. */
199:
200: #define DBG2_RINGS( priv ) \
201: DBG2 ( "tx %x/%x rx %x/%x in %s() \n", \
202: ( priv ) ->transmits_done, ( priv ) -> transmits_posted, \
203: ( priv ) ->receives_done, ( priv ) -> receives_posted, \
204: __FUNCTION__ )
205:
206: /*
207: * Return a pointer to the driver private data for a network device.
208: *
209: * @v netdev Network device created by this driver.
210: * @ret priv The corresponding driver private data.
211: */
212: static inline struct myri10ge_private *myri10ge_priv ( struct net_device *nd )
213: {
214: /* Our private data always follows the network device in memory,
215: since we use alloc_netdev() to allocate the storage. */
216:
217: return ( struct myri10ge_private * ) ( nd + 1 );
218: }
219:
220: /*
221: * Convert a Myri10ge driver private data pointer to a netdev pointer.
222: *
223: * @v p Myri10ge device private data.
224: * @ret r The corresponding network device.
225: */
226: static inline struct net_device *myri10ge_netdev ( struct myri10ge_private *p )
227: {
228: return ( ( struct net_device * ) p ) - 1;
229: }
230:
231: /*
232: * Convert a network device pointer to a PCI device pointer.
233: *
234: * @v netdev A Network Device.
235: * @ret r The corresponding PCI device.
236: */
237: static inline struct pci_device *myri10ge_pcidev ( struct net_device *netdev )
238: {
239: return container_of (netdev->dev, struct pci_device, dev);
240: }
241:
242: /*
243: * Pass a receive buffer to the NIC to be filled.
244: *
245: * @v priv The network device to receive the buffer.
246: * @v iob The I/O buffer to fill.
247: *
248: * Receive buffers are filled in FIFO order.
249: */
250: static void myri10ge_post_receive ( struct myri10ge_private *priv,
251: struct io_buffer *iob )
252: {
253: unsigned int receives_posted;
254: mcp_kreq_ether_recv_t *request;
255:
256: /* Record the posted I/O buffer, to be passed to netdev_rx() on
257: receive. */
258:
259: receives_posted = priv->receives_posted;
260: priv->receive_iob[receives_posted & MYRI10GE_RECEIVE_WRAP] = iob;
261:
262: /* Post the receive. */
263:
264: request = &priv->receive_post_ring[receives_posted
265: & priv->receive_post_ring_wrap];
266: request->addr_high = 0;
267: wmb();
268: request->addr_low = htonl ( virt_to_bus ( iob->data ) );
269: priv->receives_posted = ++receives_posted;
270: }
271:
272: /*
273: * Execute a command on the NIC.
274: *
275: * @v priv NIC to perform the command.
276: * @v cmd The command to perform.
277: * @v data I/O copy buffer for parameters/results
278: * @ret rc 0 on success, else an error code.
279: */
280: static int myri10ge_command ( struct myri10ge_private *priv,
281: uint32 cmd,
282: uint32 data[3] )
283: {
284: int i;
285: mcp_cmd_t *command;
286: uint32 result;
287: unsigned int slept_ms;
288: volatile mcp_cmd_response_t *response;
289:
290: DBGP ( "myri10ge_command ( ,%d, ) \n", cmd );
291: command = priv->command;
292: response = &priv->dma->command_response;
293:
294: /* Mark the command as incomplete. */
295:
296: response->result = 0xFFFFFFFF;
297:
298: /* Pass the command to the NIC. */
299:
300: command->cmd = htonl ( cmd );
301: command->data0 = htonl ( data[0] );
302: command->data1 = htonl ( data[1] );
303: command->data2 = htonl ( data[2] );
304: command->response_addr.high = 0;
305: command->response_addr.low
306: = htonl ( virt_to_bus ( &priv->dma->command_response ) );
307: for ( i=0; i<36; i+=4 )
308: * ( uint32 * ) &command->pad[i] = 0;
309: wmb();
310: * ( uint32 * ) &command->pad[36] = 0;
311:
312: /* Wait up to 2 seconds for a response. */
313:
314: for ( slept_ms=0; slept_ms<2000; slept_ms++ ) {
315: result = response->result;
316: if ( result == 0 ) {
317: data[0] = ntohl ( response->data );
318: return 0;
319: } else if ( result != 0xFFFFFFFF ) {
320: DBG ( "cmd%d:0x%x\n",
321: cmd,
322: ntohl ( response->result ) );
323: return -EIO;
324: }
325: udelay ( 1000 );
326: rmb();
327: }
328: DBG ( "cmd%d:timed out\n", cmd );
329: return -ETIMEDOUT;
330: }
331:
332: /*
333: * Handle any pending interrupt.
334: *
335: * @v netdev Device being polled for interrupts.
336: *
337: * This is called periodically to let the driver check for interrupts.
338: */
339: static void myri10ge_interrupt_handler ( struct net_device *netdev )
340: {
341: struct myri10ge_private *priv;
342: mcp_irq_data_t *irq_data;
343: uint8 valid;
344:
345: priv = myri10ge_priv ( netdev );
346: irq_data = &priv->dma->irq_data;
347:
348: /* Return if there was no interrupt. */
349:
350: rmb();
351: valid = irq_data->valid;
352: if ( !valid )
353: return;
354: DBG2 ( "irq " );
355:
356: /* Tell the NIC to deassert the interrupt and clear
357: irq_data->valid.*/
358:
359: *priv->irq_deassert = 0; /* any value is OK. */
360: mb();
361:
362: /* Handle any new receives. */
363:
364: if ( valid & 1 ) {
365:
366: /* Pass the receive interrupt token back to the NIC. */
367:
368: DBG2 ( "rx " );
369: *priv->irq_claim = htonl ( 3 );
370: wmb();
371: }
372:
373: /* Handle any sent packet by freeing its I/O buffer, now that
374: we know it has been DMAd. */
375:
376: if ( valid & 2 ) {
377: unsigned int nic_done_count;
378:
379: DBG2 ( "snt " );
380: nic_done_count = ntohl ( priv->dma->irq_data.send_done_count );
381: while ( priv->transmits_done != nic_done_count ) {
382: struct io_buffer *iob;
383:
384: iob = priv->transmit_iob [priv->transmits_done
385: & MYRI10GE_TRANSMIT_WRAP];
386: DBG2 ( "%p ", iob );
387: netdev_tx_complete ( netdev, iob );
388: ++priv->transmits_done;
389: }
390: }
391:
392: /* Record any statistics update. */
393:
394: if ( irq_data->stats_updated ) {
395:
396: /* Update the link status. */
397:
398: DBG2 ( "stats " );
399: if ( ntohl ( irq_data->link_up ) == MXGEFW_LINK_UP )
400: netdev_link_up ( netdev );
401: else
402: netdev_link_down ( netdev );
403:
404: /* Ignore all error counters from the NIC. */
405: }
406:
407: /* Wait for the interrupt to be deasserted, as indicated by
408: irq_data->valid, which is set by the NIC after the deassert. */
409:
410: DBG2 ( "wait " );
411: do {
412: mb();
413: } while ( irq_data->valid );
414:
415: /* Claim the interrupt to enable future interrupt generation. */
416:
417: DBG2 ( "claim\n" );
418: * ( priv->irq_claim + 1 ) = htonl ( 3 );
419: mb();
420: }
421:
422: /* Constants for reading the STRING_SPECS via the Myricom
423: Vendor Specific PCI configuration space capability. */
424:
425: #define VS_EEPROM_READ_ADDR ( vs + 0x04 )
426: #define VS_EEPROM_READ_DATA ( vs + 0x08 )
427: #define VS_EEPROM_WRITE ( vs + 0x0C )
428: #define VS_ADDR ( vs + 0x18 )
429: #define VS_DATA ( vs + 0x14 )
430: #define VS_MODE ( vs + 0x10 )
431: #define VS_MODE_READ32 0x3
432: #define VS_MODE_LOCATE 0x8
433: #define VS_LOCATE_STRING_SPECS 0x3
434: #define VS_MODE_EEPROM_STREAM_WRITE 0xB
435:
436: /*
437: * Read MAC address from its 'string specs' via the vendor-specific
438: * capability. (This capability allows NIC SRAM and ROM to be read
439: * before it is mapped.)
440: *
441: * @v pci The device.
442: * @v vs Offset of the PCI Vendor-Specific Capability.
443: * @v mac Buffer to store the MAC address.
444: * @ret rc Returns 0 on success, else an error code.
445: */
446: static int mac_address_from_string_specs ( struct pci_device *pci,
447: unsigned int vs,
448: uint8 mac[ETH_ALEN] )
449: {
450: char string_specs[256];
451: char *ptr, *limit;
452: char *to = string_specs;
453: uint32 addr;
454: uint32 len;
455: int mac_set = 0;
456:
457: /* Locate the String specs in LANai SRAM. */
458:
459: pci_write_config_byte ( pci, VS_MODE, VS_MODE_LOCATE );
460: pci_write_config_dword ( pci, VS_ADDR, VS_LOCATE_STRING_SPECS );
461: pci_read_config_dword ( pci, VS_ADDR, &addr );
462: pci_read_config_dword ( pci, VS_DATA, &len );
463: DBG2 ( "ss@%x,%x\n", addr, len );
464:
465: /* Copy in the string specs. Use 32-bit reads for performance. */
466:
467: if ( len > sizeof ( string_specs ) || ( len & 3 ) ) {
468: pci_write_config_byte ( pci, VS_MODE, 0 );
469: DBG ( "SS too big\n" );
470: return -ENOTSUP;
471: }
472:
473: pci_write_config_byte ( pci, VS_MODE, VS_MODE_READ32 );
474: while ( len >= 4 ) {
475: uint32 tmp;
476:
477: pci_write_config_byte ( pci, VS_ADDR, addr );
478: pci_read_config_dword ( pci, VS_DATA, &tmp );
479: tmp = ntohl ( tmp );
480: memcpy ( to, &tmp, 4 );
481: to += 4;
482: addr += 4;
483: len -= 4;
484: }
485: pci_write_config_byte ( pci, VS_MODE, 0 );
486:
487: /* Parse the string specs. */
488:
489: DBG2 ( "STRING_SPECS:\n" );
490: ptr = string_specs;
491: limit = string_specs + sizeof ( string_specs );
492: while ( *ptr != '\0' && ptr < limit ) {
493: DBG2 ( "%s\n", ptr );
494: if ( memcmp ( ptr, "MAC=", 4 ) == 0 ) {
495: unsigned int i;
496:
497: ptr += 4;
498: for ( i=0; i<6; i++ ) {
499: if ( ( ptr + 2 ) > limit ) {
500: DBG ( "bad MAC addr\n" );
501: return -ENOTSUP;
502: }
503: mac[i] = strtoul ( ptr, &ptr, 16 );
504: ptr += 1;
505: }
506: mac_set = 1;
507: }
508: else
509: while ( ptr < limit && *ptr++ );
510: }
511:
512: /* Verify we parsed all we need. */
513:
514: if ( !mac_set ) {
515: DBG ( "no MAC addr\n" );
516: return -ENOTSUP;
517: }
518:
519: DBG2 ( "MAC %02x:%02x:%02x:%02x:%02x:%02x\n",
520: mac[0], mac[1], mac[2], mac[3], mac[4], mac[5] );
521:
522: return 0;
523: }
524:
525: /****************************************************************
526: * NonVolatile Storage support
527: ****************************************************************/
528:
529: /*
530: * Fill a buffer with data read from nonvolatile storage.
531: *
532: * @v nvs The NonVolatile Storage device to be read.
533: * @v addr The first NonVolatile Storage address to be read.
534: * @v _buf Pointer to the data buffer to be filled.
535: * @v len The number of bytes to copy.
536: * @ret rc 0 on success, else nonzero.
537: */
538: static int myri10ge_nvs_read ( struct nvs_device *nvs,
539: unsigned int addr,
540: void *_buf,
541: size_t len )
542: {
543: struct myri10ge_private *priv =
544: container_of (nvs, struct myri10ge_private, nvs);
545: struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
546: unsigned int vs = priv->pci_cap_vs;
547: unsigned char *buf = (unsigned char *) _buf;
548: unsigned int data;
549: unsigned int i, j;
550:
551: DBGP ( "myri10ge_nvs_read\n" );
552:
553: /* Issue the first read address. */
554:
555: pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 3, addr>>16 );
556: pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 2, addr>>8 );
557: pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
558: addr++;
559:
560: /* Issue all the reads, and harvest the results every 4th issue. */
561:
562: for ( i=0; i<len; ++i,addr++ ) {
563:
564: /* Issue the next read address, updating only the
565: bytes that need updating. We always update the
566: LSB, which triggers the read. */
567:
568: if ( ( addr & 0xff ) == 0 ) {
569: if ( ( addr & 0xffff ) == 0 ) {
570: pci_write_config_byte ( pci,
571: VS_EEPROM_READ_ADDR + 3,
572: addr >> 16 );
573: }
574: pci_write_config_byte ( pci,
575: VS_EEPROM_READ_ADDR + 2,
576: addr >> 8 );
577: }
578: pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
579:
580: /* If 4 data bytes are available, read them with a single read. */
581:
582: if ( ( i & 3 ) == 3 ) {
583: pci_read_config_dword ( pci,
584: VS_EEPROM_READ_DATA,
585: &data );
586: for ( j=0; j<4; j++ ) {
587: buf[i-j] = data;
588: data >>= 8;
589: }
590: }
591: }
592:
593: /* Harvest any remaining results. */
594:
595: if ( ( i & 3 ) != 0 ) {
596: pci_read_config_dword ( pci, VS_EEPROM_READ_DATA, &data );
597: for ( j=1; j<=(i&3); j++ ) {
598: buf[i-j] = data;
599: data >>= 8;
600: }
601: }
602:
603: DBGP_HDA ( addr - len, _buf, len );
604: return 0;
605: }
606:
607: /*
608: * Write a buffer into nonvolatile storage.
609: *
610: * @v nvs The NonVolatile Storage device to be written.
611: * @v address The NonVolatile Storage address to be written.
612: * @v _buf Pointer to the data to be written.
613: * @v len Length of the buffer to be written.
614: * @ret rc 0 on success, else nonzero.
615: */
616: static int myri10ge_nvs_write ( struct nvs_device *nvs,
617: unsigned int addr,
618: const void *_buf,
619: size_t len )
620: {
621: struct myri10ge_private *priv =
622: container_of (nvs, struct myri10ge_private, nvs);
623: struct pci_device *pci = myri10ge_pcidev ( myri10ge_netdev ( priv ) );
624: unsigned int vs = priv->pci_cap_vs;
625: const unsigned char *buf = (const unsigned char *)_buf;
626: unsigned int i;
627: uint8 verify;
628:
629: DBGP ( "nvs_write " );
630: DBGP_HDA ( addr, _buf, len );
631:
632: /* Start erase of the NonVolatile Options block. */
633:
634: DBGP ( "erasing " );
635: pci_write_config_dword ( pci, VS_EEPROM_WRITE, ( addr << 8 ) | 0xff );
636:
637: /* Wait for erase to complete. */
638:
639: DBGP ( "waiting " );
640: pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
641: while ( verify != 0xff ) {
642: pci_write_config_byte ( pci, VS_EEPROM_READ_ADDR + 1, addr );
643: pci_read_config_byte ( pci, VS_EEPROM_READ_DATA, &verify );
644: }
645:
646: /* Write the data one byte at a time. */
647:
648: DBGP ( "writing " );
649: pci_write_config_byte ( pci, VS_MODE, VS_MODE_EEPROM_STREAM_WRITE );
650: pci_write_config_dword ( pci, VS_ADDR, addr );
651: for (i=0; i<len; i++, addr++)
652: pci_write_config_byte ( pci, VS_DATA, buf[i] );
653: pci_write_config_dword ( pci, VS_ADDR, 0xffffffff );
654: pci_write_config_byte ( pci, VS_MODE, 0 );
655:
656: DBGP ( "done\n" );
657: return 0;
658: }
659:
660: /*
661: * Initialize NonVolatile storage support for a device.
662: *
663: * @v priv Device private data for the device.
664: * @ret rc 0 on success, else an error code.
665: */
666:
667: static int myri10ge_nv_init ( struct myri10ge_private *priv )
668: {
669: int rc;
670: struct myri10ge_eeprom_header
671: {
672: uint8 __jump[8];
673: uint32 eeprom_len;
674: uint32 eeprom_segment_len;
675: uint32 mcp1_offset;
676: uint32 mcp2_offset;
677: uint32 version;
678: } hdr;
679: uint32 mcp2_len;
680: unsigned int nvo_fragment_pos;
681:
682: DBGP ( "myri10ge_nv_init\n" );
683:
684: /* Read the EEPROM header, and byteswap the fields we will use.
685: This is safe even though priv->nvs is not yet initialized. */
686:
687: rc = myri10ge_nvs_read ( &priv->nvs, 0, &hdr, sizeof ( hdr ) );
688: if ( rc ) {
689: DBG ( "EEPROM header unreadable\n" );
690: return rc;
691: }
692: hdr.eeprom_len = ntohl ( hdr.eeprom_len );
693: hdr.eeprom_segment_len = ntohl ( hdr.eeprom_segment_len );
694: hdr.mcp2_offset = ntohl ( hdr.mcp2_offset );
695: hdr.version = ntohl ( hdr.version );
696: DBG2 ( "eelen:%xh seglen:%xh mcp2@%xh ver%d\n", hdr.eeprom_len,
697: hdr.eeprom_segment_len, hdr.mcp2_offset, hdr.version );
698:
699: /* If the firmware does not support EEPROM writes, simply return. */
700:
701: if ( hdr.version < 1 ) {
702: DBG ( "No EEPROM write support\n" );
703: return 0;
704: }
705:
706: /* Read the length of MCP2. */
707:
708: rc = myri10ge_nvs_read ( &priv->nvs, hdr.mcp2_offset, &mcp2_len, 4 );
709: mcp2_len = ntohl ( mcp2_len );
710: DBG2 ( "mcp2len:%xh\n", mcp2_len );
711:
712: /* Determine the position of the NonVolatile Options fragment and
713: simply return if it overlaps other data. */
714:
715: nvo_fragment_pos = hdr.eeprom_len - hdr.eeprom_segment_len;
716: if ( hdr.mcp2_offset + mcp2_len > nvo_fragment_pos ) {
717: DBG ( "EEPROM full\n" );
718: return 0;
719: }
720:
721: /* Initilize NonVolatile Storage state. */
722:
723: priv->nvs.word_len_log2 = 0;
724: priv->nvs.size = hdr.eeprom_len;
725: priv->nvs.block_size = hdr.eeprom_segment_len;
726: priv->nvs.read = myri10ge_nvs_read;
727: priv->nvs.write = myri10ge_nvs_write;
728:
729: /* Register the NonVolatile Options storage. */
730:
731: nvo_init ( &priv->nvo,
732: &priv->nvs,
733: nvo_fragment_pos, 0x200,
734: NULL,
735: & myri10ge_netdev (priv) -> refcnt );
736: rc = register_nvo ( &priv->nvo,
737: netdev_settings ( myri10ge_netdev ( priv ) ) );
738: if ( rc ) {
739: DBG ("register_nvo failed");
740: return rc;
741: }
742:
743: priv->nvo_registered = 1;
744: DBG2 ( "NVO supported\n" );
745: return 0;
746: }
747:
748: void
749: myri10ge_nv_fini ( struct myri10ge_private *priv )
750: {
751: /* Simply return if nonvolatile access is not supported. */
752:
753: if ( 0 == priv->nvo_registered )
754: return;
755:
756: unregister_nvo ( &priv->nvo );
757: }
758:
759: /****************************************************************
760: * iPXE PCI Device Driver API functions
761: ****************************************************************/
762:
763: /*
764: * Initialize the PCI device.
765: *
766: * @v pci The device's associated pci_device structure.
767: * @v id The PCI device + vendor id.
768: * @ret rc Returns zero if successfully initialized.
769: *
770: * This function is called very early on, while iPXE is initializing.
771: * This is a iPXE PCI Device Driver API function.
772: */
773: static int myri10ge_pci_probe ( struct pci_device *pci )
774: {
775: static struct net_device_operations myri10ge_operations = {
776: .open = myri10ge_net_open,
777: .close = myri10ge_net_close,
778: .transmit = myri10ge_net_transmit,
779: .poll = myri10ge_net_poll,
780: .irq = myri10ge_net_irq
781: };
782:
783: const char *dbg;
784: int rc;
785: struct net_device *netdev;
786: struct myri10ge_private *priv;
787:
788: DBGP ( "myri10ge_pci_probe: " );
789:
790: netdev = alloc_etherdev ( sizeof ( *priv ) );
791: if ( !netdev ) {
792: rc = -ENOMEM;
793: dbg = "alloc_etherdev";
794: goto abort_with_nothing;
795: }
796:
797: netdev_init ( netdev, &myri10ge_operations );
798: priv = myri10ge_priv ( netdev );
799:
800: pci_set_drvdata ( pci, netdev );
801: netdev->dev = &pci->dev;
802:
803: /* Make sure interrupts are disabled. */
804:
805: myri10ge_net_irq ( netdev, 0 );
806:
807: /* Find the PCI Vendor-Specific capability. */
808:
809: priv->pci_cap_vs = pci_find_capability ( pci , PCI_CAP_ID_VNDR );
810: if ( 0 == priv->pci_cap_vs ) {
811: rc = -ENOTSUP;
812: dbg = "no_vs";
813: goto abort_with_netdev_init;
814: }
815:
816: /* Read the NIC HW address. */
817:
818: rc = mac_address_from_string_specs ( pci,
819: priv->pci_cap_vs,
820: netdev->hw_addr );
821: if ( rc ) {
822: dbg = "mac_from_ss";
823: goto abort_with_netdev_init;
824: }
825: DBGP ( "mac " );
826:
827: /* Enable bus master, etc. */
828:
829: adjust_pci_device ( pci );
830: DBGP ( "pci " );
831:
832: /* Register the initialized network device. */
833:
834: rc = register_netdev ( netdev );
835: if ( rc ) {
836: dbg = "register_netdev";
837: goto abort_with_netdev_init;
838: }
839:
840: /* Initialize NonVolatile Storage support. */
841:
842: rc = myri10ge_nv_init ( priv );
843: if ( rc ) {
844: dbg = "myri10ge_nv_init";
845: goto abort_with_registered_netdev;
846: }
847:
848: DBGP ( "done\n" );
849:
850: return 0;
851:
852: abort_with_registered_netdev:
853: unregister_netdev ( netdev );
854: abort_with_netdev_init:
855: netdev_nullify ( netdev );
856: netdev_put ( netdev );
857: abort_with_nothing:
858: DBG ( "%s:%s\n", dbg, strerror ( rc ) );
859: return rc;
860: }
861:
862: /*
863: * Remove a device from the PCI device list.
864: *
865: * @v pci PCI device to remove.
866: *
867: * This is a PCI Device Driver API function.
868: */
869: static void myri10ge_pci_remove ( struct pci_device *pci )
870: {
871: struct net_device *netdev;
872:
873: DBGP ( "myri10ge_pci_remove\n" );
874: netdev = pci_get_drvdata ( pci );
875:
876: myri10ge_nv_fini ( myri10ge_priv ( netdev ) );
877: unregister_netdev ( netdev );
878: netdev_nullify ( netdev );
879: netdev_put ( netdev );
880: }
881:
882: /****************************************************************
883: * iPXE Network Device Driver Operations
884: ****************************************************************/
885:
886: /*
887: * Close a network device.
888: *
889: * @v netdev Device to close.
890: *
891: * This is a iPXE Network Device Driver API function.
892: */
893: static void myri10ge_net_close ( struct net_device *netdev )
894: {
895: struct myri10ge_private *priv;
896: uint32 data[3];
897:
898: DBGP ( "myri10ge_net_close\n" );
899: priv = myri10ge_priv ( netdev );
900:
901: /* disable interrupts */
902:
903: myri10ge_net_irq ( netdev, 0 );
904:
905: /* Reset the NIC interface, so we won't get any more events from
906: the NIC. */
907:
908: myri10ge_command ( priv, MXGEFW_CMD_RESET, data );
909:
910: /* Free receive buffers that were never filled. */
911:
912: while ( priv->receives_done != priv->receives_posted ) {
913: free_iob ( priv->receive_iob[priv->receives_done
914: & MYRI10GE_RECEIVE_WRAP] );
915: ++priv->receives_done;
916: }
917:
918: /* Release DMAable memory. */
919:
920: free_dma ( priv->dma, sizeof ( *priv->dma ) );
921:
922: /* Erase all state from the open. */
923:
924: memset ( priv, 0, sizeof ( *priv ) );
925:
926: DBG2_RINGS ( priv );
927: }
928:
929: /*
930: * Enable or disable IRQ masking.
931: *
932: * @v netdev Device to control.
933: * @v enable Zero to mask off IRQ, non-zero to enable IRQ.
934: *
935: * This is a iPXE Network Driver API function.
936: */
937: static void myri10ge_net_irq ( struct net_device *netdev, int enable )
938: {
939: struct pci_device *pci_dev;
940: uint16 val;
941:
942: DBGP ( "myri10ge_net_irq\n" );
943: pci_dev = ( struct pci_device * ) netdev->dev;
944:
945: /* Adjust the Interrupt Disable bit in the Command register of the
946: PCI Device. */
947:
948: pci_read_config_word ( pci_dev, PCI_COMMAND, &val );
949: if ( enable )
950: val &= ~PCI_COMMAND_INTX_DISABLE;
951: else
952: val |= PCI_COMMAND_INTX_DISABLE;
953: pci_write_config_word ( pci_dev, PCI_COMMAND, val );
954: }
955:
956: /*
957: * Opens a network device.
958: *
959: * @v netdev Device to be opened.
960: * @ret rc Non-zero if failed to open.
961: *
962: * This enables tx and rx on the device.
963: * This is a iPXE Network Device Driver API function.
964: */
965: static int myri10ge_net_open ( struct net_device *netdev )
966: {
967: const char *dbg; /* printed upon error return */
968: int rc;
969: struct io_buffer *iob;
970: struct myri10ge_private *priv;
971: uint32 data[3];
972: struct pci_device *pci_dev;
973: void *membase;
974:
975: DBGP ( "myri10ge_net_open\n" );
976: priv = myri10ge_priv ( netdev );
977: pci_dev = ( struct pci_device * ) netdev->dev;
978: membase = phys_to_virt ( pci_dev->membase );
979:
980: /* Compute address for passing commands to the firmware. */
981:
982: priv->command = membase + MXGEFW_ETH_CMD;
983:
984: /* Ensure interrupts are disabled. */
985:
986: myri10ge_net_irq ( netdev, 0 );
987:
988: /* Allocate cleared DMAable buffers. */
989:
990: priv->dma = malloc_dma ( sizeof ( *priv->dma ) , 128 );
991: if ( !priv->dma ) {
992: rc = -ENOMEM;
993: dbg = "DMA";
994: goto abort_with_nothing;
995: }
996: memset ( priv->dma, 0, sizeof ( *priv->dma ) );
997:
998: /* Simplify following code. */
999:
1000: #define TRY( prefix, base, suffix ) do { \
1001: rc = myri10ge_command ( priv, \
1002: MXGEFW_ \
1003: ## prefix \
1004: ## base \
1005: ## suffix, \
1006: data ); \
1007: if ( rc ) { \
1008: dbg = #base; \
1009: goto abort_with_dma; \
1010: } \
1011: } while ( 0 )
1012:
1013: /* Send a reset command to the card to see if it is alive,
1014: and to reset its queue state. */
1015:
1016: TRY ( CMD_, RESET , );
1017:
1018: /* Set the interrupt queue size. */
1019:
1020: data[0] = ( (uint32_t)( sizeof ( priv->dma->receive_completion ) )
1021: | MXGEFW_CMD_SET_INTRQ_SIZE_FLAG_NO_STRICT_SIZE_CHECK );
1022: TRY ( CMD_SET_ , INTRQ_SIZE , );
1023:
1024: /* Set the interrupt queue DMA address. */
1025:
1026: data[0] = virt_to_bus ( &priv->dma->receive_completion );
1027: data[1] = 0;
1028: TRY ( CMD_SET_, INTRQ_DMA, );
1029:
1030: /* Get the NIC interrupt claim address. */
1031:
1032: TRY ( CMD_GET_, IRQ_ACK, _OFFSET );
1033: priv->irq_claim = membase + data[0];
1034:
1035: /* Get the NIC interrupt assert address. */
1036:
1037: TRY ( CMD_GET_, IRQ_DEASSERT, _OFFSET );
1038: priv->irq_deassert = membase + data[0];
1039:
1040: /* Disable interrupt coalescing, which is inappropriate for the
1041: minimal buffering we provide. */
1042:
1043: TRY ( CMD_GET_, INTR_COAL, _DELAY_OFFSET );
1044: * ( ( uint32 * ) ( membase + data[0] ) ) = 0;
1045:
1046: /* Set the NIC mac address. */
1047:
1048: data[0] = ( netdev->ll_addr[0] << 24
1049: | netdev->ll_addr[1] << 16
1050: | netdev->ll_addr[2] << 8
1051: | netdev->ll_addr[3] );
1052: data[1] = ( ( netdev->ll_addr[4] << 8 )
1053: | netdev->ll_addr[5] );
1054: TRY ( SET_ , MAC_ADDRESS , );
1055:
1056: /* Enable multicast receives, because some iPXE clients don't work
1057: without multicast. . */
1058:
1059: TRY ( ENABLE_ , ALLMULTI , );
1060:
1061: /* Disable Ethernet flow control, so the NIC cannot deadlock the
1062: network under any circumstances. */
1063:
1064: TRY ( DISABLE_ , FLOW , _CONTROL );
1065:
1066: /* Compute transmit ring sizes. */
1067:
1068: data[0] = 0; /* slice 0 */
1069: TRY ( CMD_GET_, SEND_RING, _SIZE );
1070: priv->transmit_ring_wrap
1071: = data[0] / sizeof ( mcp_kreq_ether_send_t ) - 1;
1072: if ( priv->transmit_ring_wrap
1073: & ( priv->transmit_ring_wrap + 1 ) ) {
1074: rc = -EPROTO;
1075: dbg = "TX_RING";
1076: goto abort_with_dma;
1077: }
1078:
1079: /* Compute receive ring sizes. */
1080:
1081: data[0] = 0; /* slice 0 */
1082: TRY ( CMD_GET_ , RX_RING , _SIZE );
1083: priv->receive_post_ring_wrap = data[0] / sizeof ( mcp_dma_addr_t ) - 1;
1084: if ( priv->receive_post_ring_wrap
1085: & ( priv->receive_post_ring_wrap + 1 ) ) {
1086: rc = -EPROTO;
1087: dbg = "RX_RING";
1088: goto abort_with_dma;
1089: }
1090:
1091: /* Get NIC transmit ring address. */
1092:
1093: data[0] = 0; /* slice 0. */
1094: TRY ( CMD_GET_, SEND, _OFFSET );
1095: priv->transmit_ring = membase + data[0];
1096:
1097: /* Get the NIC receive ring address. */
1098:
1099: data[0] = 0; /* slice 0. */
1100: TRY ( CMD_GET_, SMALL_RX, _OFFSET );
1101: priv->receive_post_ring = membase + data[0];
1102:
1103: /* Set the Nic MTU. */
1104:
1105: data[0] = ETH_FRAME_LEN;
1106: TRY ( CMD_SET_, MTU, );
1107:
1108: /* Tell the NIC our buffer sizes. ( We use only small buffers, so we
1109: set both buffer sizes to the same value, which will force all
1110: received frames to use small buffers. ) */
1111:
1112: data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1113: TRY ( CMD_SET_, SMALL_BUFFER, _SIZE );
1114: data[0] = MXGEFW_PAD + ETH_FRAME_LEN;
1115: TRY ( CMD_SET_, BIG_BUFFER, _SIZE );
1116:
1117: /* Tell firmware where to DMA IRQ data */
1118:
1119: data[0] = virt_to_bus ( &priv->dma->irq_data );
1120: data[1] = 0;
1121: data[2] = sizeof ( priv->dma->irq_data );
1122: TRY ( CMD_SET_, STATS_DMA_V2, );
1123:
1124: /* Post receives. */
1125:
1126: while ( priv->receives_posted <= MYRI10GE_RECEIVE_WRAP ) {
1127:
1128: /* Reserve 2 extra bytes at the start of packets, since
1129: the firmware always skips the first 2 bytes of the buffer
1130: so TCP headers will be aligned. */
1131:
1132: iob = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1133: if ( !iob ) {
1134: rc = -ENOMEM;
1135: dbg = "alloc_iob";
1136: goto abort_with_receives_posted;
1137: }
1138: iob_reserve ( iob, MXGEFW_PAD );
1139: myri10ge_post_receive ( priv, iob );
1140: }
1141:
1142: /* Bring up the link. */
1143:
1144: TRY ( CMD_, ETHERNET_UP, );
1145:
1146: DBG2_RINGS ( priv );
1147: return 0;
1148:
1149: abort_with_receives_posted:
1150: while ( priv->receives_posted-- )
1151: free_iob ( priv->receive_iob[priv->receives_posted] );
1152: abort_with_dma:
1153: /* Because the link is not up, we don't have to reset the NIC here. */
1154: free_dma ( priv->dma, sizeof ( *priv->dma ) );
1155: abort_with_nothing:
1156: /* Erase all signs of the failed open. */
1157: memset ( priv, 0, sizeof ( *priv ) );
1158: DBG ( "%s: %s\n", dbg, strerror ( rc ) );
1159: return ( rc );
1160: }
1161:
1162: /*
1163: * This function allows a driver to process events during operation.
1164: *
1165: * @v netdev Device being polled.
1166: *
1167: * This is called periodically by iPXE to let the driver check the status of
1168: * transmitted packets and to allow the driver to check for received packets.
1169: * This is a iPXE Network Device Driver API function.
1170: */
1171: static void myri10ge_net_poll ( struct net_device *netdev )
1172: {
1173: struct io_buffer *iob;
1174: struct io_buffer *replacement;
1175: struct myri10ge_dma_buffers *dma;
1176: struct myri10ge_private *priv;
1177: unsigned int length;
1178: unsigned int orig_receives_posted;
1179:
1180: DBGP ( "myri10ge_net_poll\n" );
1181: priv = myri10ge_priv ( netdev );
1182: dma = priv->dma;
1183:
1184: /* Process any pending interrupt. */
1185:
1186: myri10ge_interrupt_handler ( netdev );
1187:
1188: /* Pass up received frames, but limit ourselves to receives posted
1189: before this function was called, so we cannot livelock if
1190: receives are arriving faster than we process them. */
1191:
1192: orig_receives_posted = priv->receives_posted;
1193: while ( priv->receives_done != orig_receives_posted ) {
1194:
1195: /* Stop if there is no pending receive. */
1196:
1197: length = ntohs ( dma->receive_completion
1198: [priv->receives_done
1199: & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1200: .length );
1201: if ( length == 0 )
1202: break;
1203:
1204: /* Allocate a replacement buffer. If none is available,
1205: stop passing up packets until a buffer is available.
1206:
1207: Reserve 2 extra bytes at the start of packets, since
1208: the firmware always skips the first 2 bytes of the buffer
1209: so TCP headers will be aligned. */
1210:
1211: replacement = alloc_iob ( MXGEFW_PAD + ETH_FRAME_LEN );
1212: if ( !replacement ) {
1213: DBG ( "NO RX BUF\n" );
1214: break;
1215: }
1216: iob_reserve ( replacement, MXGEFW_PAD );
1217:
1218: /* Pass up the received frame. */
1219:
1220: iob = priv->receive_iob[priv->receives_done
1221: & MYRI10GE_RECEIVE_WRAP];
1222: iob_put ( iob, length );
1223: netdev_rx ( netdev, iob );
1224:
1225: /* We have consumed the packet, so clear the receive
1226: notification. */
1227:
1228: dma->receive_completion [priv->receives_done
1229: & MYRI10GE_RECEIVE_COMPLETION_WRAP]
1230: .length = 0;
1231: wmb();
1232:
1233: /* Replace the passed-up I/O buffer. */
1234:
1235: myri10ge_post_receive ( priv, replacement );
1236: ++priv->receives_done;
1237: DBG2_RINGS ( priv );
1238: }
1239: }
1240:
1241: /*
1242: * This transmits a packet.
1243: *
1244: * @v netdev Device to transmit from.
1245: * @v iobuf Data to transmit.
1246: * @ret rc Non-zero if failed to transmit.
1247: *
1248: * This is a iPXE Network Driver API function.
1249: */
1250: static int myri10ge_net_transmit ( struct net_device *netdev,
1251: struct io_buffer *iobuf )
1252: {
1253: mcp_kreq_ether_send_t *kreq;
1254: size_t len;
1255: struct myri10ge_private *priv;
1256: uint32 transmits_posted;
1257:
1258: DBGP ( "myri10ge_net_transmit\n" );
1259: priv = myri10ge_priv ( netdev );
1260:
1261: /* Confirm space in the send ring. */
1262:
1263: transmits_posted = priv->transmits_posted;
1264: if ( transmits_posted - priv->transmits_done
1265: > MYRI10GE_TRANSMIT_WRAP ) {
1266: DBG ( "TX ring full\n" );
1267: return -ENOBUFS;
1268: }
1269:
1270: DBG2 ( "TX %p+%zd ", iobuf->data, iob_len ( iobuf ) );
1271: DBG2_HD ( iobuf->data, 14 );
1272:
1273: /* Record the packet being transmitted, so we can later report
1274: send completion. */
1275:
1276: priv->transmit_iob[transmits_posted & MYRI10GE_TRANSMIT_WRAP] = iobuf;
1277:
1278: /* Copy and pad undersized frames, because the NIC does not pad,
1279: and we would rather copy small frames than do a gather. */
1280:
1281: len = iob_len ( iobuf );
1282: if ( len < ETH_ZLEN ) {
1283: iob_pad ( iobuf, ETH_ZLEN );
1284: len = ETH_ZLEN;
1285: }
1286:
1287: /* Enqueue the packet by writing a descriptor to the NIC.
1288: This is a bit tricky because the HW requires 32-bit writes,
1289: but the structure has smaller fields. */
1290:
1291: kreq = &priv->transmit_ring[transmits_posted
1292: & priv->transmit_ring_wrap];
1293: kreq->addr_high = 0;
1294: kreq->addr_low = htonl ( virt_to_bus ( iobuf->data ) );
1295: ( ( uint32 * ) kreq ) [2] = htonl (
1296: 0x0000 << 16 /* pseudo_header_offset */
1297: | ( len & 0xFFFF ) /* length */
1298: );
1299: wmb();
1300: ( ( uint32 * ) kreq ) [3] = htonl (
1301: 0x00 << 24 /* pad */
1302: | 0x01 << 16 /* rdma_count */
1303: | 0x00 << 8 /* cksum_offset */
1304: | ( MXGEFW_FLAGS_SMALL
1305: | MXGEFW_FLAGS_FIRST
1306: | MXGEFW_FLAGS_NO_TSO ) /* flags */
1307: );
1308: wmb();
1309:
1310: /* Mark the slot as consumed and return. */
1311:
1312: priv->transmits_posted = ++transmits_posted;
1313: DBG2_RINGS ( priv );
1314: return 0;
1315: }
1316:
1317: static struct pci_device_id myri10ge_nics[] = {
1318: /* Each of these macros must be a single line to satisfy a script. */
1319: PCI_ROM ( 0x14c1, 0x0008, "myri10ge", "Myricom 10Gb Ethernet Adapter", 0 ) ,
1320: };
1321:
1322: struct pci_driver myri10ge_driver __pci_driver = {
1323: .ids = myri10ge_nics,
1324: .id_count = ( sizeof ( myri10ge_nics ) / sizeof ( myri10ge_nics[0] ) ) ,
1325: .probe = myri10ge_pci_probe,
1326: .remove = myri10ge_pci_remove
1327: };
1328:
1329: /*
1330: * Local variables:
1331: * c-basic-offset: 8
1332: * c-indent-level: 8
1333: * tab-width: 8
1334: * End:
1335: */
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.