Annotation of XNU/osfmk/mach/flipc_cb.h, revision 1.1.1.1

1.1       root        1: /*
                      2:  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
                      3:  *
                      4:  * @APPLE_LICENSE_HEADER_START@
                      5:  * 
                      6:  * The contents of this file constitute Original Code as defined in and
                      7:  * are subject to the Apple Public Source License Version 1.1 (the
                      8:  * "License").  You may not use this file except in compliance with the
                      9:  * License.  Please obtain a copy of the License at
                     10:  * http://www.apple.com/publicsource and read it before using this file.
                     11:  * 
                     12:  * This Original Code and all software distributed under the License are
                     13:  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
                     14:  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
                     15:  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
                     16:  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
                     17:  * License for the specific language governing rights and limitations
                     18:  * under the License.
                     19:  * 
                     20:  * @APPLE_LICENSE_HEADER_END@
                     21:  */
                     22: /*
                     23:  * @OSF_COPYRIGHT@
                     24:  *
                     25:  */
                     26: /*
                     27:  * HISTORY
                     28:  * 
                     29:  * Revision 1.1.1.1  1998/09/22 21:05:29  wsanchez
                     30:  * Import of Mac OS X kernel (~semeria)
                     31:  *
                     32:  * Revision 1.1.1.1  1998/03/07 02:25:45  wsanchez
                     33:  * Import of OSF Mach kernel (~mburg)
                     34:  *
                     35:  * Revision 1.1.11.1  1996/09/17  16:34:42  bruel
                     36:  *     fixed types.
                     37:  *     [96/09/17            bruel]
                     38:  *
                     39:  * Revision 1.1.6.1  1995/06/13  18:20:10  sjs
                     40:  *     Merged from flipc_shared.
                     41:  *     [95/06/07            sjs]
                     42:  * 
                     43:  * Revision 1.1.3.14  1995/05/19  00:58:14  sjs
                     44:  *     Added send_ready to shared area, used for fast check if there is something
                     45:  *     to do (and prevents the cache from getting stirred).
                     46:  *     [95/05/18            sjs]
                     47:  * 
                     48:  * Revision 1.1.3.13  1995/05/16  20:46:28  randys
                     49:  *     Export performance valid information through performance
                     50:  *     structure rather than kernel configuration section.
                     51:  *     [95/05/16            randys]
                     52:  * 
                     53:  *     Added performance (FLIPC_PERF) config information to
                     54:  *     kernel_configuration section of comm buffer, so that user
                     55:  *     programs can find out if this information is being gathered.
                     56:  *     [95/05/16            randys]
                     57:  * 
                     58:  * Revision 1.1.3.12  1995/05/15  14:26:54  randys
                     59:  *     Updated comments on use of acquire pointer (it's completely
                     60:  *     ignored if dpb is set) and added macros for testing !dpb and
                     61:  *     enabled at the same time.
                     62:  *     [95/05/11            randys]
                     63:  * 
                     64:  *     Change pme_process_ptr ==> sme_process_ptr (since it's being read
                     65:  *     by AIL now).
                     66:  *     [95/05/11            randys]
                     67:  * 
                     68:  *     Added private copied of release and process pointers.
                     69:  *     [95/05/11            randys]
                     70:  * 
                     71:  *     Rearrange endpoint structure to separate data with importantly
                     72:  *     different access patterns into different cache lines.  This
                     73:  *     involved duplicating some (effectively constant) entries, and
                     74:  *     creating two versions of some macros.
                     75:  *     [95/05/11            randys]
                     76:  * 
                     77:  * Revision 1.1.3.11  1995/05/08  16:06:33  randys
                     78:  *     Added comment explaining that an endpoint bufferlist must always
                     79:  *     have valid buffer pointers in all of its entries, to keep
                     80:  *     FLIPC_endpoint_buffer_available from going off the deep end.  No
                     81:  *     code changes.
                     82:  *     [95/04/18            randys]
                     83:  * 
                     84:  * Revision 1.1.3.10  1995/04/05  21:21:52  randys
                     85:  *     Added a field to the buffer control structure holding the
                     86:  *     scheduling policy chosen for the allocations lock.
                     87:  *     [95/04/05            randys]
                     88:  * 
                     89:  * Revision 1.1.3.9  1995/03/23  20:35:19  randys
                     90:  *     Added comments indicating duplication of declarations of
                     91:  *     flipc_cb_base & flipc_cb_size in this file and in flipc_usermsg.h
                     92:  *     Modified declaration of flipc_cb_size to be unsigned long.
                     93:  *     [95/03/21            randys]
                     94:  * 
                     95:  * Revision 1.1.3.8  1995/02/23  21:32:42  randys
                     96:  *     Added space for kernel configuration in communications buffer
                     97:  *     control structure.
                     98:  *     [95/02/22            randys]
                     99:  * 
                    100:  * Revision 1.1.3.7  1995/02/21  17:22:58  randys
                    101:  *     Re-indented code to four space indentation
                    102:  *     [1995/02/21  16:25:32  randys]
                    103:  * 
                    104:  * Revision 1.1.3.6  1995/02/13  22:57:29  randys
                    105:  *     Replaced all of NEXT_{ACQUIRE,RELEASE,PROCESS}_PTR macros with a
                    106:  *      single NEXT_BUFFERLIST_PTR macro.
                    107:  *     [95/02/03            randys]
                    108:  * 
                    109:  * Revision 1.1.3.5  1995/01/26  21:01:44  randys
                    110:  *     Add performance structure into CB.
                    111:  *     [1995/01/24  21:14:31  randys]
                    112:  * 
                    113:  *     Added flag in epgroup structure to note that epgroup
                    114:  *     has a semaphore associated with it.
                    115:  *     [1995/01/19  23:02:13  randys]
                    116:  * 
                    117:  *     Add a space in the comm buffer header for the null_destination
                    118:  *     the ME sets up for the AIL.  Get rid of
                    119:  *     FLIPC_ADDRESS_ENDPOINT_PTR (it isn't used)
                    120:  *     [1995/01/19  20:22:30  randys]
                    121:  * 
                    122:  *     Up the comm buffer size to 1 megabyte
                    123:  *     [1995/01/17  22:23:27  randys]
                    124:  * 
                    125:  * Revision 1.1.3.4  1995/01/12  21:19:01  randys
                    126:  *     Minor commenting changes from dlb
                    127:  *     [1995/01/06  18:18:12  randys]
                    128:  * 
                    129:  * Revision 1.1.3.3  1994/12/22  16:23:57  randys
                    130:  *     Fixed calculation of number of buffers on an endpoint
                    131:  *     to take size of buffer pointers into account.
                    132:  *     [1994/12/21  16:19:55  randys]
                    133:  * 
                    134:  * Revision 1.1.3.2  1994/12/20  19:01:56  randys
                    135:  *     Moved definition of flipc_simple_lock to flipc_cb.h
                    136:  *     [1994/12/20  17:34:41  randys]
                    137:  * 
                    138:  *     Added a simple lock in the comm buffer to use for the
                    139:  *     allocations lock, along with directions as to how
                    140:  *     to use it (not like a normal simple lock).
                    141:  *     [1994/12/20  15:27:25  randys]
                    142:  * 
                    143:  *     Added error log into communications buffer control
                    144:  *     structure, and changed FLIPC_ADDRESS_ENDPOINT_PTR to
                    145:  *     correctly compute the endpoint pointer based on the
                    146:  *     new ctl structure layout.
                    147:  *     [1994/12/19  23:47:45  randys]
                    148:  * 
                    149:  *     Added filename in comment at top of each file
                    150:  *     [1994/12/19  20:28:20  randys]
                    151:  * 
                    152:  *     Add version field to epgroup to check races on buffer acquire
                    153:  *     from epgroup.
                    154:  *     [1994/12/19  18:05:04  randys]
                    155:  * 
                    156:  * Revision 1.1.3.1  1994/12/12  17:46:12  randys
                    157:  *     Putting initial flipc implementation under flipc_shared
                    158:  *     [1994/12/12  16:27:46  randys]
                    159:  * 
                    160:  * Revision 1.1.1.2  1994/12/11  23:11:18  randys
                    161:  *     Initial flipc code checkin
                    162:  * 
                    163:  * $EndLog$
                    164:  */
                    165: 
                    166: /*
                    167:  * mach/flipc_cb.h
                    168:  *
                    169:  * This file is intended to be the data structure layout for the flipc
                    170:  * communcations buffer, both for the KKT implementation and
                    171:  * for the eventual paragon implementation.  This file should include
                    172:  * all of the information necessary for either humans or machines to
                    173:  * understand the data structure layout.
                    174:  *
                    175:  * The communications buffer is the wired section of memory used for
                    176:  * communication between the flipc applications interface layer and
                    177:  * the flipc message engine.  No structure in it are visible to the
                    178:  * user; the applications interface layer mediates all user access to
                    179:  * the CB.
                    180:  */
                    181: 
                    182: #ifndef _MACH_FLIPC_CB_H_
                    183: #define _MACH_FLIPC_CB_H_
                    184: 
                    185: #include <mach/flipc_types.h>
                    186: 
                    187: /*
                    188:  * Flipc naming and argument ordering conventions (this applies mainly to
                    189:  * user-interface.h, but seems inappropriate in a user-visible header file):
                    190:  *
                    191:  * All objects prefixed with "flipc"; uppercase for user-visible
                    192:  * objects, lower case for internal ones.
                    193:  *
                    194:  * Types created with typedef will have _t suffixes.
                    195:  *
                    196:  * Words will be separated by '_'.
                    197:  *
                    198:  * Macro definitions will be all in caps.
                    199:  *
                    200:  * Enum members will have their initial letter (after Flipc) capitalized.
                    201:  *
                    202:  *
                    203:  * For user-visible routines:
                    204:  *
                    205:  * The first word following the "flipc" will be the flipc object type that
                    206:  * that routine operates on (specifically "domain", "epgroup",
                    207:  * "endpoint", or "buffer").
                    208:  *
                    209:  * The object named by the first word of the call will, if an argument
                    210:  * to the call, be the first argument.
                    211:  *
                    212:  * Output variables passed as pointers in the arglist will come last.
                    213:  */
                    214: 
                    215: /*
                    216:  * The kinds of objects that exist in the communications buffer are:
                    217:  *
                    218:  * Endpoints -- Used for sending or receiving.
                    219:  * Buffers -- Composed of a buffer header and buffer data.
                    220:  * Endpoint groups -- Used for collecting multiple numbers of endpoints
                    221:  *     together for a select like operation.
                    222:  */
                    223: 
                    224: /*
                    225:  * We can't use general pointers inside the communications buffer,
                    226:  * since the address space on either side of the interface is
                    227:  * different.  The places where we could use pointers are:
                    228:  *
                    229:  *     *) From endpoint sets to endpoints.
                    230:  *     *) From endpoints to buffers.
                    231:  *
                    232:  * The kinds of pointers we could use are:
                    233:  *     *) Byte offset from the beginning of the comm buffer.  This
                    234:  *        is simple, but has the disadvantage of allowing the user to
                    235:  *        play games with pointing endpoint buffer pointers into data
                    236:  *        space, & etc.
                    237:  *     *) Rigid arrays of each type of object, with the object
                    238:  *        "pointer" being an index into the array.  This avoids the
                    239:  *        above problem, but complicates memory allocation (forces
                    240:  *        allocation to be contiguous, which may force pre-deciding
                    241:  *        how much space each of the above types will take).
                    242:  *
                    243:  * Though we appear to be going for the rigid allocation for each type
                    244:  * of data structure, I'm still going to do the "simple offset"
                    245:  * solution to maintain maximum flexibility into the future.
                    246:  * The single exception to this is that FLIPC addresses will be composed of
                    247:  * node number and endpoint number, where the endpoint number will be
                    248:  * the index into the endpoint array.
                    249:  */
                    250: 
                    251: typedef unsigned long flipc_cb_ptr;
                    252: /* Define a null value, which doesn't point anywhere into the CB.  */
                    253: #define FLIPC_CBPTR_NULL ((flipc_cb_ptr) -1)
                    254: 
                    255: /*
                    256:  * Synchronization between message engine and application.
                    257:  *
                    258:  * In general, it isn't reasonable to allow locking and unlocking of
                    259:  * data structures between message engine and communications buffer,
                    260:  * as this requires the message engine to trust arbitrary user
                    261:  * threads.  The solution is to arrange all data structures so that
                    262:  * they may be accessed by both parties without locking.  The way that
                    263:  * this is usually done is that specific variables are considered to
                    264:  * be owned by one of the ME or the AIL, and the other party is
                    265:  * allowed to read the variable but not to modify it.  With this
                    266:  * arrangement, implementing things like producer/consumer circular
                    267:  * queues is possible; each agent (ME or AIL) goes around the list
                    268:  * doing its thing, and avoids passing the pointer showing where the
                    269:  * other agent is working.
                    270:  *
                    271:  * Following the above, we may divide structure members into five
                    272:  * classes, and define prefixes for these five classes.
                    273:  *
                    274:  *     Description             Prefix
                    275:  *      -------------------------------
                    276:  *     Private to AIL          pail_
                    277:  *     Private to ME           pme_
                    278:  *     AIL owned, read by ME   sail_
                    279:  *     ME owned, read by AIL   sme_
                    280:  *     Shared in other way     shrd_
                    281:  *
                    282:  * Shared variables may change their ownership based on their own
                    283:  * or someone elses value (these variables may be thought of as
                    284:  * being handed back and forth between the two entities) or on a
                    285:  * configuration option of the structure (not handed back and forth,
                    286:  * but still based on another variables value).
                    287:  *
                    288:  * In addition, I am going to put variables that are set at endpoint
                    289:  * allocation and cleared at deallocation (but read by both sides) in
                    290:  * a separate class; they are "AIL owned, read by ME" but are
                    291:  * effectively constant over the synchronization protocols we care
                    292:  * about.
                    293:  *
                    294:  *     Constant after allocation       const_
                    295:  *
                    296:  * Note that this ignores memory consistency issues (when the two
                    297:  * agents are actually on two separate processors).  These issues need
                    298:  * to be explored in more detail; for now suffice it to say that the
                    299:  * above methods work given a sequentially consistent memory model or
                    300:  * a processor consistent memory model.
                    301:  *
                    302:  * Also note that an optimizing compiler may reorder our memory
                    303:  * accesses, playing merry hell with the inter-node synchronization
                    304:  * protocols (the compiler doesn't know about the other node, after
                    305:  * all).  To avoid this, all structure members used for
                    306:  * synchronization will be marked volatile; this will force the
                    307:  * compiler to keep the order and number of accesses intact.  This
                    308:  * will also force the compiler *not* to optimize way accesses to
                    309:  * these variables, so it is wise to explicitly load the variable into
                    310:  * a temporary once if you need to do multiple computations with it,
                    311:  * and store it back afterwards when you are done.
                    312:  */
                    313: 
                    314: /*
                    315:  * Memory allocation:
                    316:  *
                    317:  * For maximum simplicity in the first implementation, we need to know
                    318:  * at comm buffer allocation time how many endpoints, endpoint_sets,
                    319:  * and buffers we will want total, until the end of time.  This
                    320:  * masively simplifies memory allocation; there will be a single array
                    321:  * of each type of data and the communication buffer will be taken up
                    322:  * by the concatenation of these arrays (with some fiddling to make
                    323:  * sure that no data crosses a page boundary).
                    324:  *
                    325:  * For each data type there will be a free list to which pieces of
                    326:  * data will be added to or removed from as needed.  Each data type
                    327:  * will have a pointer in it to allow it to be linked onto the free
                    328:  * list.
                    329:  */
                    330: 
                    331: /*
                    332:  * Multiple thread access to data structures:
                    333:  *
                    334:  * There are several points in the communications buffer (notably
                    335:  * endpoint accesses) when multiple application threads will be
                    336:  * attempting operations on data structures at the same time.  To
                    337:  * multiplex these operations, we need a per-data structure lock.
                    338:  * Lock attributes:
                    339:  *     *) This lock will not be kernel based, as such a lock would be
                    340:  *        too heavyweight to use for arbitrary sending and receiving
                    341:  *        operations).
                    342:  *     *) Because it is not kernel based, it may not be used to
                    343:  *        multiplex accesses from threads at different kernel
                    344:  *        priority levels.  Deadlock would result if a low-priority
                    345:  *        thread gained the lock and then was prempted by a
                    346:  *        high-priority thread that wanted to acquire it.
                    347:  *     *) Architecture-dependent interfaces need to be designed to
                    348:  *        atomically lock and unlock this data structure.
                    349:  *
                    350:  * These are "simple locks" and are defined in flipc_dep.h.
                    351:  */
                    352: 
                    353: /*
                    354:  * Lock type.  This placement (in flipc_cb.h) is a little bit of a
                    355:  * hack, as it really should be defined with the machine dependent lock
                    356:  * macros.  But then the machine independent lock macros have problems
                    357:  * because they have to include it both before and after the prototypes.
                    358:  * So rather than split the machine dependent stuff into multiple
                    359:  * files, I'll define it here and hope that this definition works for
                    360:  * whatever architectures we're on.
                    361:  */
                    362: typedef unsigned long flipc_simple_lock;
                    363: 
                    364: /*
                    365:  * Ownership of data structures.
                    366:  *
                    367:  * Please note that this is a can of worms, and that I (Randys)
                    368:  * consider this (and it's interactions with endpoint group membership)
                    369:  * the likeliest place for design bugs in FLIPC.  Any and all should
                    370:  * take this as an open invitation and challenge to find bugs in what
                    371:  * follows.
                    372:  *
                    373:  * Rules:
                    374:  *
                    375:  *     *) If you've disabled a structure and synched with the
                    376:  *        appropriate side of the ME, the ME won't touch it.
                    377:  *
                    378:  *     *) If you've taken a send endpoint off of the send endpoint
                    379:  *        list and sync'd with the ME, the ME won't touch it.
                    380:  *
                    381:  *[The rest of this applies to the AIL only; the above rules are the
                    382:  * only ones the ME respects.  ]
                    383:  *
                    384:  *     *) Within the AIL, a disabled structure is owned by:
                    385:  *             *) The routine that disabled it, before it is put on
                    386:  *                the free list.
                    387:  *             *) The routine that dequeued it from the free list,
                    388:  *                before it is enabled.
                    389:  *        Taking of the simple lock is not required for ownership in
                    390:  *        these cases.  Taking of the simple lock is not required for
                    391:  *        the act of *enabling* the structure (you have ownership and
                    392:  *        are giving it away), however it is required for the act of
                    393:  *        disabling the structure (since it is the only valid way to
                    394:  *        take ownership of an enabled structure, and you can't
                    395:  *        modify the enabled bit without having ownership).
                    396:  *
                    397:  *     *) The simple lock in a structure always needs to be valid, as
                    398:  *        simple locks may be taken while the structure is in any
                    399:  *        state.  Simiarly, the enabled bit must always be valid,
                    400:  *        both because it's what the ME checks, and because it may be
                    401:  *        checked by the AIL while the structure is free.
                    402:  *
                    403:  *     *) Holding the simple lock on an enabled structure imparts
                    404:  *        ownership of that structure.  You are allowed to take the
                    405:  *        simple lock of a disabled structure, but ownership is not
                    406:  *        gained by doing so.
                    407:  *
                    408:  *     *) You are allowed to read the enabled/disabled bit without
                    409:  *        owning the structure (if the structure is disabled, there
                    410:  *        may be no way to gain the ownership).
                    411:  *
                    412:  *     *) Owning a structure allows you to do what you want with it,
                    413:  *        except:
                    414:  *             *) As mentioned above, the simple lock and
                    415:  *                enabled/disabled bit must always be valid.
                    416:  *             *) The ownership of the endpoint group related members
                    417:  *                of an endpoint structure is special; see below.
                    418:  *             *) The allocations lock must be held to manipulate the
                    419:  *                next send endpoint field of any endpoint.
                    420:  *
                    421:  *     *) If an endpoint is on an endpoint group, the ownership of
                    422:  *        the the endpoint group related members of the structure
                    423:  *        (sail_endpoint_group and pail_next_eg_endpoint) go with the
                    424:  *        owndership of the endpoint group, not the endpoint.  For
                    425:  *        this purpose only, membership is defined atomically as the
                    426:  *        sail_endpoint_group pointer being set to an endpoint group.
                    427:  *        Thus one may remove an endpoint from an endpoint group
                    428:  *        without owning the endpoint (change the sail_endpoint_group
                    429:  *        pointer last).  One requires both locks to add an endpoint
                    430:  *        to an endpoint group, however.
                    431:  *
                    432:  *        (Part of the motivation for this is that removal and
                    433:  *        addition of endpoints to endpoint groups requires
                    434:  *        modifications of pointers in other endpoint structures).
                    435:  *
                    436:  *     *) No structure may be put on the free list if marked with any
                    437:  *        association to any other structure.  Specifically, endpoint
                    438:  *        groups may have no endpoints belonging to them, and
                    439:  *        endpoints may not belong to an endpoint group or have
                    440:  *        buffers belonging to them.
                    441:  *
                    442:  *     *) One consequence of the above is that endpoint groups may
                    443:  *        not be marked as disabled while they have any endpoints on
                    444:  *        them, as freeing an endpoint requires it to be removed from
                    445:  *        its endpoint group, and if ownership of the endpoint group
                    446:  *        cannot be gained, that is impossible.
                    447:  *
                    448:  *     *) In theory, endpoints *may* be marked disabled while they
                    449:  *        are still on endpoint groups.  In practice, they are not.
                    450:  *        This is relied on by the code which frees endpoint groups,
                    451:  *        in a non-obvious way.  Specifically, that code assumes that
                    452:  *        there is no way that a call to free endpoint will return
                    453:  *        with the endpoint still on the endpoint group.  Since the
                    454:  *        only way for free endpoint to fail is if the endpoint is
                    455:  *        inactive, and since the endpoint is set inactive only after
                    456:  *        free endpoint (presumably a different one) confirms that it
                    457:  *        isn't on any endpoint group, this assumption is true.
                    458:  *
                    459:  *        Got that?  Take home lesson: don't allow endpoints to be
                    460:  *        marked disabled while still on endpoint groups until you
                    461:  *        *do* get that, and are willing to take the responsibility
                    462:  *        of changing it so that it works under your new scheme.
                    463:  *
                    464:  *     *) Ownership of the freelist(s) are gained by holding the
                    465:  *        allocations lock for the buffer, and *only* in that way.
                    466:  *        No modification of freelist, send endpoint list, or send
                    467:  *        side ME sync bits is valid without holding the allocations
                    468:  *        lock.  In other words, while you can read things in the
                    469:  *        main communications buffer control structure at will, you
                    470:  *        may not change them without owning the allocations lock.
                    471:  *
                    472:  *     *) The state where a structure is disabled but off of the
                    473:  *        freelist may be valid as an intermediate (while an AIL
                    474:  *        routine is orchestrating a transition) but is not a valid
                    475:  *        static state.  This state must not survive the return to
                    476:  *        application code of the thread that disabled the structure.
                    477:  */
                    478: 
                    479: /*
                    480:  * Flipc data buffer management.
                    481:  *
                    482:  * A buffer (whether being used for sending or receiving) may be in
                    483:  * one of three states:
                    484:  *
                    485:  * READY -- Buffer held by application.
                    486:  * PROCESSING -- Buffer held by endpoint, unprocessed.  For receive endpoints,
                    487:  *        this means that the buffer is empty, waiting to be filled by
                    488:  *        an incoming message.  For send endpoints, this means tht the
                    489:  *        buffer is full, waiting to be sent out.
                    490:  * COMPLETED -- Buffer held by the endpoint, processed.  For receive
                    491:  *        endpoints, this means that the buffer is full, with newly
                    492:  *        received data in it.  For send endpoints, this means that the
                    493:  *        buffer is empty (*), with it's data having been sent out.
                    494:  *
                    495:  *        (*) In point of fact the data hasn't been touched, though bits
                    496:  *        may have been fiddled with in the header data structure.  But
                    497:  *        it's been sent.
                    498:  * FREE -- The buffer is in the pool of free buffers, and may be
                    499:  * allocated to any newly created endpoint.
                    500:  *
                    501:  * The transition diagram between these states is relatively simple:
                    502:  *
                    503:  *
                    504:  *                  release
                    505:  *             /-----------------\|
                    506:  * +----------+                  -+----------+
                    507:  * |  READY   |                   |PROCESSING|<- - - - - -
                    508:  * +----------+_                  +----------+           \
                    509:  *      ^     |\ - - - - - - - - /     |    |             \endpoint allocate
                    510:  *      |         (processed)              \endpoint       \
                    511:  *      |                              |     \ free         |
                    512:  *      | acquire                      /      ------\
                    513:  *      |                                           \      |
                    514:  *      |                            / (processed)    >+----------+
                    515:  * +----------+                                               |   FREE   |
                    516:  * |COMPLETED |< - - - - - - - - - -                  +----------+
                    517:  * +----------+                               endpoint allocate    /     ^
                    518:  *     |     ^- - - - - - - - - - - - - - - - - - - - - - -      |
                    519:  *     |                                                        /
                    520:  *     \               endpoint free                           /
                    521:  *      ------------------------------------------------------/
                    522:  *
                    523:  * (If it doesn't look simple, imagine it without the FREE state; that
                    524:  * state doesn't enter into almost any buffer manipulations)
                    525:  *
                    526:  * For send buffers, release==send, acquire==allocate, and
                    527:  * processed==the sending done by the message engine.  For receive buffers,
                    528:  * release==release, acquire==receive, and process==the actual
                    529:  * arrival of the message handled by the messaging engine.
                    530:  *
                    531:  * The choice of path from the PROCESSING state is an endpoint
                    532:  * specific configuration option; a particular endpoint may leave a
                    533:  * processed buffer on the endpoint, or it may release it back to the
                    534:  * application by dropping it from the endpoint.
                    535:  *
                    536:  * Buffers are assigned the PROCESSING state on a newly allocated
                    537:  * receive endpoint (to be ready to receive messages) and the
                    538:  * COMPLETED state on a newly allocated send endpoint.
                    539:  *
                    540:  * The state (other than FREE) that a particular buffer is in is
                    541:  * determined by its place on a circular queue of buffer pointers that
                    542:  * is part of the endpoint structure.  Buffers owned by the
                    543:  * application (READY) are not pointed to by pointers on this queue.
                    544:  * The buffer is released to the message engine by placement of a
                    545:  * pointer to it on this queue.  When the message engine is done
                    546:  * processing the buffer, it sets a flag in the buffer header.  If the
                    547:  * endpoint is so configured, it then removes the buffer pointer from
                    548:  * the queue; otherwise the AIL acquires the buffer (and removes the
                    549:  * pointer from the queue) when it chooses.
                    550:  *
                    551:  *      . . . . . .
                    552:  *     .           .
                    553:  *    .             .
                    554:  *    .             .       AIL releasing
                    555:  *    .             .       ^
                    556:  *    .         +-------+--/
                    557:  *    .         |       |
                    558:  *    .         |Buffers|
                    559:  *    .         | to be |
                    560:  *    .         |Sent or|
                    561:  *    .         |Receivd|
                    562:  *    .         | Into  |    ^ ME processing
                    563:  *    .         +-------+ --/
                    564:  *    .         |       |
                    565:  *    .  AIL    | Sent  |  (These buffers have a flag set to indicate
                    566:  *    .Acquiring|  or   |   that they have been processed.  This
                    567:  *    .         |Filled |   section is optional; the endpoint may be
                    568:  *    .         |buffers|   configured to drop buffers after processing)
                    569:  *    .     ^   |       |
                    570:  *    .      \--+-------+
                    571:  *    .             .
                    572:  *     .           .
                    573:  *      . . . . . .
                    574:  *
                    575:  *
                    576:  * The AIL will refuse to acquire a buffer that has not yet been
                    577:  * processed by the ME.  Acquire will not work at all on endpoints
                    578:  * that have been configured to drop buffers on completion.
                    579:  *
                    580:  * The buffer_available primitive is coded to avoid doing a
                    581:  * (potentially costly) acquiring of the endpoint flipc lock.  Since
                    582:  * telling where there is a buffer available requires two operations
                    583:  * (comparison of the acquire and release pointers to see if there are
                    584:  * any buffers on the endpoint, and then indirection of the acquire
                    585:  * pointer to see if that buffer has bee processed yet), there is a
                    586:  * potential race that will admit the possibility of indirecting
                    587:  * through an invalid pointer.  For this reason, for the life of an
                    588:  * endpoint, it is a requirement that all buffer pointers on the
                    589:  * bufferlist point *somewhere* (ie. to some existing buffer), so that
                    590:  * this indirection will not cause an access error.  The
                    591:  * buffer_available primitive may return the wrong result, but (as
                    592:  * long as the incorrectness is transitory), this is acceptable.
                    593:  */
                    594: 
                    595: /* Set up the states so that FLIPC_buffer_processed can just do an
                    596:    & and a test.  */
                    597: typedef enum {
                    598:     flipc_Free = 0x0, flipc_Processing = 0x1,
                    599:     flipc_Completed = 0x2, flipc_Ready = 0x3
                    600: } flipc_buffer_state_t;
                    601: #define FLIPC_BUFFER_PROCESSED_P(state) ((state) & 0x2)
                    602: 
                    603: /*
                    604:  * Data header/buffer layout.
                    605:  *
                    606:  * For this implementation, and probably for all time, the header
                    607:  * immediately precedes the data in memory, and the mesaging engine
                    608:  * will send both header and data.  Our priority is message dispatch
                    609:  * speed rather than raw bandwidth (this is the small message side of
                    610:  * a transfer mechanism), so we don't mind that we are throwing away
                    611:  * some bandwidth by taking up transferred space with header data.
                    612:  *
                    613:  * The data size will be the maximum size allowed by the underlying
                    614:  * transport, minus the header size (available at run time).  The user
                    615:  * will be given a pointer to the data buffer, and will use this both
                    616:  * for copying data in and out, and as an argument to the underlying
                    617:  * flipc routines.  The flipc routines will access appropriately.
                    618:  *
                    619:  * The header structure follows; the user data type will be offset and
                    620:  * cast appropriately to access this.
                    621:  */
                    622: 
                    623: typedef struct flipc_data_buffer {
                    624:     union {
                    625:        FLIPC_address_t destination; /* For sending.  */
                    626:        flipc_cb_ptr free;      /* Link for header free list.  */
                    627:     } u;
                    628: 
                    629:     /* ME owned if flipc_Processing, AIL owned otherwise.  May not ever
                    630:        assume the state flipc_Ready in an optimized implementation.  */
                    631:     volatile flipc_buffer_state_t shrd_state;
                    632: } *flipc_data_buffer_t;
                    633: 
                    634: /*
                    635:  * Endpoint structure.
                    636:  *
                    637:  * An endpoint is the data structure used for communicating buffers,
                    638:  * either send or receive.  Note that all actual circular lists of
                    639:  * buffer pointers on the endpoints are in their own array that gets
                    640:  * partitioned out to the various endpoints.  This is because we want
                    641:  * the endpoint structures themselves to be fixed size for easy
                    642:  * indexing upon receit of a message.  This large scale array will be
                    643:  * of size (max_buffers_per_endpoint) * (number_of_endpoints).  Both
                    644:  * of these values are set during the domain initialization call.
                    645:  *
                    646:  * Note that the pointers contained in the buffer lists are pointers to
                    647:  * buffer *headers*, not to the data.
                    648:  */
                    649: 
                    650: /*
                    651:  * This structure is divided into four cache lines, separated by their
                    652:  * usage type:
                    653:  *
                    654:  *     *) Private data that the AIL scribbles on.
                    655:  *     *) Data the AIL writes (regularly) that the ME reads
                    656:  *        (occaisionally).  The canonical example is the release pointer.
                    657:  *     *) Private data that the ME scribbles on.
                    658:  *     *) Data the ME writes (regularly) that the AIL reads (occaisionally).
                    659:  *        The canonical example is the process pointer.
                    660:  *
                    661:  * There are a couple of other categories of stuff, that can be shoehorned
                    662:  * into the above:
                    663:  *     *) Constant data that both sides read regularly.  This can be
                    664:  *        duplicated in the two private areas (actually, it can be
                    665:  *        duplicated in any two areas that stay in the cache of the
                    666:  *        respective processors).
                    667:  *     *) Stuff that is not accessed on the critical path; it can go
                    668:  *        almost anywhere (probably in one of the two ping-ponging
                    669:  *        cache lines).
                    670:  *     *) Stuff that is read-only for a single processor goes in that
                    671:  *        processors private data section.
                    672:  *
                    673:  * Duplicate entries have a "p" or a "a" suffixed to the name to
                    674:  * indicate that fact.  Note that these will usually, but not always,
                    675:  * be "const" variables--they may be "const" variables only from the
                    676:  * critical path viewpoint.
                    677:  *
                    678:  * We take cache line length as being 8 * sizeof(int).
                    679:  */
                    680: 
                    681: typedef struct flipc_endpoint {
                    682: 
                    683:     /* ===Private AIL data===  */
                    684:     /* Type of endpoint (send, recv, etc).  Duplicated in private
                    685:        ME section.  */
                    686:     FLIPC_endpoint_type_t constda_type;
                    687: 
                    688:     /* This next value is two variables squeezed into a single word to
                    689:      * save on memory accesses (since they are almost always read at
                    690:      * the same time.  The two variables are:
                    691:      *
                    692:      * const_drop_processed_buffers -- Should the message engine drop
                    693:      * buffers after processing them (as opposed to leaving them on
                    694:      * the endpoint)?
                    695:      *
                    696:      * sail_enabled (volatile) -- Is the endpoint enabled?  This isn't
                    697:      * marked constant because it is used for synchronization on
                    698:      * endpoint deallocation.
                    699:      *
                    700:      * Note that to reduce test and branches, we these two variables
                    701:      * are represented by two bits in the word (bit 0 and bit 16).  It
                    702:      * is illegal to have bits other than 0 and 16 set in this word.
                    703:      * This assumption is used in ENABLED_AND_NOT_DPB_P, and is enforced
                    704:      * in DOE_CONSTRUCT (assumed to not be performance critical) below.
                    705:      *
                    706:      * Duplicated in private ME section.
                    707:      */
                    708: 
                    709:     volatile unsigned long sailda_dpb_or_enabled;
                    710: 
                    711: #define EXTRACT_DPB(dpb_or_enabled) ((dpb_or_enabled) >> 16)
                    712: #define EXTRACT_ENABLED(dpb_or_enabled)  ((dpb_or_enabled) & 0xffff)
                    713: #define DISABLED_OR_DPB_P(dpb_or_enabled) ((dpb_or_enabled) ^ 0x1)
                    714: #define DOE_CONSTRUCT(dpb, enabled) \
                    715:     (((dpb) ? 0x10000 : 0) | ((enabled) ? 0x1 : 0))
                    716: 
                    717:     flipc_simple_lock pail_lock; /* Simple lock for serializing
                    718:                                    multiple thread access to
                    719:                                    structure.  AIL owned.  */
                    720:     /* First element in buffer list array that is ours.  Constant
                    721:        from communications buffer initialization.  */
                    722:     flipc_cb_ptr constda_my_buffer_list;
                    723:     /* First element after my_buffer_list that is *not* in my buffer
                    724:        list.  Constant from communications buffer initialization.  */
                    725:     flipc_cb_ptr constda_next_buffer_list;
                    726: 
                    727:     /* First location that has a valid buffer pointer in it.  This may
                    728:        contain a pointer to a buffer available for acquisition, or it
                    729:        may contain a pointer to a buffer that is still being
                    730:        processed; the buffer header or process_ptr needs to be checked
                    731:        to be sure.  This location is AIL owned.  It is ignored by all
                    732:        (including the ME and initialization code) if
                    733:        drop_processed_buffers, above, is set.  */
                    734:     volatile flipc_cb_ptr shrd_acquire_ptr;
                    735: 
                    736:     /* AIL private copy of process pointer.  This hopefully means that
                    737:        the AIL won't need to read the real process pointer (and fault
                    738:        in a cache line) very often.  */
                    739:     flipc_cb_ptr pail_process_ptr;
                    740: 
                    741:     unsigned int pad_pail_7;
                    742: 
                    743:     /* ===End of cache line===*/
                    744:     /* ===AIL writes, ME occaisionally reads=== */
                    745: 
                    746:     /* Next location at which the AIL may insert a buffer pointer.  */
                    747:     volatile flipc_cb_ptr sail_release_ptr;
                    748:     unsigned int pad_sail_1;
                    749:     unsigned int pad_sail_2;
                    750:     unsigned int pad_sail_3;
                    751:     unsigned int pad_sail_4;
                    752:     unsigned int pad_sail_5;
                    753:     unsigned int pad_sail_6;
                    754:     unsigned int pad_sail_7;
                    755: 
                    756:     /* ===End of cache line===*/
                    757:     /* ===Private ME data=== */
                    758:     /* See above comments (in private ail section).  */
                    759: 
                    760:     FLIPC_endpoint_type_t constdm_type;
                    761:     volatile unsigned long saildm_dpb_or_enabled;
                    762: 
                    763:     volatile unsigned long sme_overruns; /* For a receive endpoint, counter for
                    764:                                     the number of messages that have
                    765:                                     arrived when there hasn't been
                    766:                                     space.  ME owned.   */
                    767:     unsigned long pail_overruns_seen;  /* A count of the number of overruns
                    768:                                   that the AIL has noted and doesn't
                    769:                                   want to be bothered with again.
                    770:                                   The user only sees the difference
                    771:                                   between the previous count and this.  */
                    772: 
                    773:     /*
                    774:      * For send endpoints; linked into a list that is used by the ME
                    775:      * to find stuff to do.  Also used for endpoint free list.
                    776:      * Null if at end of list.  Not "const" because it's used as a
                    777:      * synchronization variable during setup and teardown
                    778:      * of send endpoints.
                    779:      */
                    780:     volatile flipc_cb_ptr sail_next_send_endpoint;
                    781: 
                    782:     /* Constant buffer lsit pointers for ME.  See private ail comments.  */
                    783:     flipc_cb_ptr constdm_my_buffer_list;
                    784:     flipc_cb_ptr constdm_next_buffer_list;
                    785: 
                    786:     /* Private ME copy of release pointer.  This hopefully means that
                    787:        the ME won't have to read (and fault in a cache line) the
                    788:        release pointer very often.  */
                    789: 
                    790:     flipc_cb_ptr pme_release_ptr;
                    791:     /* ===End of cache line===*/
                    792: 
                    793:     /* ===ME writes, AIL occaisionally reads=== */
                    794:     /*
                    795:      * For endpoint group membership.
                    796:      */
                    797:     flipc_cb_ptr pail_next_eg_endpoint; /* Next endpoint in endpoint group.
                    798:                                           AIL owned.  */
                    799:     flipc_cb_ptr sail_epgroup; /* Direct pointer to endpoint group that
                    800:                                   we are part of.  FLIPC_CBPTR_NULL
                    801:                                   if none.  AIL owned.  */
                    802: 
                    803:     /* First location that has a buffer pointer available for
                    804:        processing. If this value is equal to the release_ptr there are no
                    805:        buffers available for processing.  */
                    806:     volatile flipc_cb_ptr sme_process_ptr;
                    807:     unsigned int pad_sme_3;
                    808:     unsigned int pad_sme_4;
                    809:     unsigned int pad_sme_5;
                    810:     unsigned int pad_sme_6;
                    811:     unsigned int pad_sme_7;
                    812: 
                    813:     /* ===End of cache line===*/
                    814:     /* ===END=== */
                    815: 
                    816:     /* The following macros may have possible performance loss in
                    817:        multiple accesses (or indirection, but a good compiler will get
                    818:        around that).  We need to have versions for each processor so
                    819:        that the constant reads are done from the right copy.  */
                    820: 
                    821:     /* General bufferlist pointer increment macro, with versions
                    822:        for ME and AIL.  */
                    823: 
                    824: #define NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, suf)     \
                    825:     (((bufferlist_ptr) + sizeof(flipc_data_buffer_t)           \
                    826:       == ((endpoint)->const ## suf ## _next_buffer_list)) ?    \
                    827:      ((endpoint)->const ## suf ## _my_buffer_list) :           \
                    828:      (bufferlist_ptr) + sizeof(flipc_data_buffer_t))
                    829: #define NEXT_BUFFERLIST_PTR_ME(bufferlist_ptr, endpoint) \
                    830:     NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, dm)
                    831: #define NEXT_BUFFERLIST_PTR_AIL(bufferlist_ptr, endpoint) \
                    832:     NEXT_BUFFERLIST_PTR(bufferlist_ptr, endpoint, da)
                    833: 
                    834:     /* Macros for each of "can I release onto this buffer?"  "Can I
                    835:        acquire from this buffer?" and "Can I process an element on
                    836:        this buffer?"  The first two presume they are being executed on
                    837:        the main procesor, the third on the co-processor.
                    838:        All have three arguments:
                    839:        *) A variable which will be set to the release, acquire, or
                    840:        process pointer after the macro *if* the operation is ok.
                    841:        *) A temporary variable used inside the function.
                    842:        *) The endpoint.
                    843: 
                    844:        We presume the acquire macro won't be called if drop processed
                    845:        buffers is enabled; the process and release macros deal
                    846:        appropriately with that issue.  */
                    847: 
                    848:     /* In general these macros will:
                    849:        *) Not read a volatile structure member more than once.
                    850:        *) If a variables owner is the other processor, these macros
                    851:           will check a local copy of the variable first before checking
                    852:           the other processors.
                    853:        *) Will only update the local copy if the remote copy really is
                    854:           different from the local one.
                    855:          */
                    856: 
                    857: /* This macro implements the synchronization check; local cbptr is
                    858:    the pointer owned by the local processor which we want to compare
                    859:    with a pointer on the remote processor which we have a copy
                    860:    of locally.  Reads the remote pointer zero or one times; other
                    861:    reads are as necessary.  
                    862: 
                    863:    The algorithm is: 
                    864:    *) If the local copy says our pointer and the remote value aren't equal,
                    865:       we're done.
                    866:    *) Otherwise, check the remote copy.  If it says the values aren't
                    867:       equal, update the local copy.  */
                    868: 
                    869: #define ENDPOINT_SYNCNE_CHECK(local_cbptr, copy_rmt_cbptr,     \
                    870:                              rmt_cbptr, tmp_cbptr)             \
                    871:     ((local_cbptr) != (copy_rmt_cbptr)                         \
                    872:      || ((((tmp_cbptr) = (rmt_cbptr)) != (local_cbptr))                \
                    873:         && (((copy_rmt_cbptr) = (tmp_cbptr)), 1)))
                    874: 
                    875: #define ENDPOINT_ACQUIRE_OK(acquire_cbptr, tmp_cbptr, endpoint)                \
                    876:     ((acquire_cbptr) = (endpoint)->shrd_acquire_ptr,                   \
                    877:      ENDPOINT_SYNCNE_CHECK(acquire_cbptr, (endpoint)->pail_process_ptr,        \
                    878:                           (endpoint)->sme_process_ptr, tmp_cbptr))
                    879: 
                    880: #define ENDPOINT_PROCESS_OK(process_cbptr, tmp_cbptr, endpoint)                \
                    881:     ((process_cbptr) = (endpoint)->sme_process_ptr,                    \
                    882:      ENDPOINT_SYNCNE_CHECK(process_cbptr, (endpoint)->pme_release_ptr, \
                    883:                           (endpoint)->sail_release_ptr, tmp_cbptr))
                    884:       
                    885: #define NODPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint)  \
                    886:     ((release_cbptr) = (endpoint)->sail_release_ptr,                   \
                    887:      (tmp_cbptr) = (endpoint)->shrd_acquire_ptr,                       \
                    888:      (NEXT_BUFFERLIST_PTR_AIL(release_cbptr, endpoint)                 \
                    889:       != (tmp_cbptr)))     
                    890:            
                    891: /* Don't use NEXT_BUFFERLIST_PTR here to save a temporary variable.  */ 
                    892: #define DPB_ENDPOINT_RELEASE_OK(release_cbptr, tmp_cbptr, endpoint)       \
                    893:     (release_cbptr = (endpoint)->sail_release_ptr,                        \
                    894:      ((release_cbptr + sizeof(flipc_data_buffer_t) ==                     \
                    895:        (endpoint)->constda_next_buffer_list)                              \
                    896:       ? ENDPOINT_SYNCNE_CHECK((endpoint)->constda_my_buffer_list,         \
                    897:                              (endpoint)->pail_process_ptr,                \
                    898:                              (endpoint)->sme_process_ptr,                 \
                    899:                              tmp_cbptr)                                   \
                    900:       : ENDPOINT_SYNCNE_CHECK(release_cbptr + sizeof(flipc_data_buffer_t), \
                    901:                              (endpoint)->pail_process_ptr,                \
                    902:                              (endpoint)->sme_process_ptr,                 \
                    903:                              tmp_cbptr)))
                    904: 
                    905:     /* This next is tricky; remember that acquire_ptr points
                    906:        to an actual bufferptr on the list, whereas release_ptr does
                    907:        not.  This macro is only used in FLIPC_endpoint_query, and so
                    908:        doesn't need to have an ME version.  */
                    909: 
                    910: #define BUFFERS_ON_ENDPOINT_AIL(acquire_ptr, release_ptr, endpoint)    \
                    911:     ((release_ptr) > (acquire_ptr)                                     \
                    912:      ? ((release_ptr) - (acquire_ptr)) / sizeof(flipc_cb_ptr)          \
                    913:      : ((((release_ptr) - (endpoint)->constda_my_buffer_list)          \
                    914:         + ((endpoint)->constda_next_buffer_list - acquire_ptr))        \
                    915:        / sizeof(flipc_cb_ptr)))
                    916: } *flipc_endpoint_t;
                    917: 
                    918: 
                    919: /*
                    920:  * Endpoint groups.
                    921:  *
                    922:  * Used to represent a group of endpoints, for linking sending/receiving
                    923:  * with semaphores & etc.  Note that there needs to be a private data
                    924:  * structure kept by the kernel that associates with each epgroup
                    925:  * a semaphore to be used for wakeups on that endpoint set.
                    926:  */
                    927: 
                    928: typedef struct flipc_epgroup {
                    929:     flipc_simple_lock pail_lock;       /* Lock to synchronize threads (at the
                    930:                                           same priority level) accessing this
                    931:                                           structure.  */
                    932:     volatile unsigned long sail_enabled;       /* Set if structure is active.  */
                    933:     unsigned long const_semaphore_associated; /* Flag to indicate whether or not
                    934:                                          there is a semaphore associated
                    935:                                          with this endpoint group in the
                    936:                                          kernel flipc routines.  */
                    937:     volatile unsigned long sail_wakeup_req; /* Incremented when a thread wants to
                    938:                                        be woken.  */
                    939:     volatile unsigned long pme_wakeup_del; /* Incremented when the ME delivers a
                    940:                                       wakeup. */
                    941:     unsigned long pail_version;                /* Incremented when epgroup membership
                    942:                                           is changed; checked when retrieving
                    943:                                           a buffer from an epgroup.  */
                    944:     unsigned long sail_msgs_per_wakeup;        /* How many messages need to arrive
                    945:                                           before the ME delivers a wakeup.  */
                    946:     unsigned long pme_msgs_since_wakeup;       /* How many messages have arrived
                    947:                                           since the last wakeup.  ME
                    948:                                           owned.  */
                    949: 
                    950:     flipc_cb_ptr pail_first_endpoint; /* First endpoint in the group.  The
                    951:                                         other endpoints are linked along
                    952:                                         behind him.  AIL owned.  */
                    953:     flipc_cb_ptr pail_free;    /* Used to link this endpoint onto
                    954:                                   the freelist.  */
                    955: } *flipc_epgroup_t;
                    956: 
                    957: /*
                    958:  * Communication buffer control structure.
                    959:  *
                    960:  * This is in the communications buffer itself.  Note that any changes
                    961:  * in this structure require it to be locked with the allocation lock,
                    962:  * as access to this structure is shared by all threads using the CB.
                    963:  */
                    964: 
                    965: /*
                    966:  * Individual data type layout.
                    967:  *
                    968:  * All we need here is a pointer to the start of each type of data
                    969:  * struct, the number of those data structures in the communications
                    970:  * buffer, and a pointer to the beginning of the freelist for that data
                    971:  * structure.
                    972:  *
                    973:  * Note that the composite buffer list doesn't have a freelist associated
                    974:  * with it, since each section of the buffer list is tightly bound to an
                    975:  * endpoint, and is allocated and freed with that endpoint.  We still
                    976:  * need the start and number information, though.
                    977:  */
                    978: struct flipc_cb_type_ctl {
                    979:     flipc_cb_ptr start;                /* Where there array of this type of
                    980:                                   data structure starts.  */
                    981:     unsigned long number;              /* How many of them we've got.  */
                    982:     flipc_cb_ptr free;         /* Where the beginning of the freelist
                    983:                                   is.  */
                    984: };
                    985: 
                    986: /*
                    987:  * Synchronization with message engine.
                    988:  *
                    989:  * At certain times (specifically during structure allocation/free or
                    990:  * additions to the send list) you want to know that the messaging
                    991:  * engine has picked up your changes.  However, the message engine has
                    992:  * (effectively) two threads, one for each of the send and receive
                    993:  * sides.  The mechanisms used for synchronizations with the two sides
                    994:  * differ.  In an eventual co-processor implementation (with a single
                    995:  * thread), only the send side mechanism will be used.
                    996:  *
                    997:  * To request a cached state flush by the send side of the mesasging
                    998:  * engine, you flip the request_sync bit and it responds by flipping
                    999:  * the response_sync bit.  The send ME checks this bit once every trip
                   1000:  * through the send endpoints.
                   1001:  *
                   1002:  * On the receive side, since receives take very little time and do
                   1003:  * not block (unlike sends) when we want to make sure the ME is
                   1004:  * holding no cached receive side state, we simply spin until we see
                   1005:  * that the ME receive side is no longer operating.  It sets a
                   1006:  * variable whenever it is in the process of receiving a message.
                   1007:  */
                   1008: 
                   1009: /*
                   1010:  * Proper manipulation of the send endpoint list.
                   1011:  *
                   1012:  * Note that synchronizing with the message engine over access to the
                   1013:  * send endpoint list is especially tricky.  There is no problem with
                   1014:  * writing new values in all of the locations required to take a send
                   1015:  * endpoint off of the list.  However, we must be very sure before
                   1016:  * modifying the pointer *in* the send endpoint that the ME isn't
                   1017:  * currently working in that send endpoint (else it could be sent off
                   1018:  * into the void).  Two options here:
                   1019:  *
                   1020:  *     *) Synchronize (using the below variables) for each send
                   1021:  *        endpoint removed, after the removal but before the
                   1022:  *        modification of the data in the internal structure.
                   1023:  *     *) If we can always be sure that the send endpoint link in the
                   1024:  *        endpoint structure has a valid value, we can simply let the
                   1025:  *        chips fall where they may.  It will be null while free, and
                   1026:  *        have a value that points back into the send buffer list
                   1027:  *        when reallocated.  I'm not going to do this; it's sleezy
                   1028:  *        and will partially mess up fairness based on ME send
                   1029:  *        endpoint round-robinning.
                   1030:  */
                   1031: 
                   1032: /*
                   1033:  * This entire structure is protected by an kernel level lock so there
                   1034:  * is no conflict between threads accessing it.  See flipc_kfr.c for
                   1035:  * details on this lock; how it is implemented and used depends on what
                   1036:  * kernel base we are on.
                   1037:  */
                   1038: 
                   1039: /*
                   1040:  * Note that the last element of this structure is variable sized, so this
                   1041:  * structure itself is also variable sized.
                   1042:  */
                   1043: typedef struct flipc_comm_buffer_ctl {
                   1044:     /* Kernel flipc configuration that the user must match in order to
                   1045:        work with this kernel.  Checked as soon as the comm buffer is
                   1046:        mapped.  */
                   1047:     struct {
                   1048:        unsigned int real_time_primitives:1;
                   1049:        unsigned int message_engine_in_kernel:1;
                   1050:        unsigned int no_bus_locking:1; /* One way check -- if the kernel doesn't
                   1051:                                   have this and the user does, that's
                   1052:                                   an error.  */
                   1053:     } kernel_configuration;
                   1054:     volatile unsigned long     send_ready;     /* A send(s) is ready to go */
                   1055: 
                   1056:     /* These first three structures are constant after communications buffer
                   1057:        initialization.  */
                   1058:     unsigned long data_buffer_size; /* Size of the data buffers.  */
                   1059:     unsigned long local_node_address; /* Local node number.  */
                   1060:     FLIPC_address_t null_destination; /* Local null destination value.  */
                   1061: 
                   1062: #if REAL_TIME_PRIMITIVES
                   1063:     /* The scheduling policy used by the task initializing flipc for
                   1064:        the allocations lock.  */
                   1065:     int allocations_lock_policy;
                   1066: #else
                   1067:     /* A poor substitute for a kernel level allocations lock.
                   1068:        Note that this *cannot* be used as a regular simple lock;
                   1069:        instead, try to acquire it, call sleep(1), try again, etc.
                   1070:        Spinning on this lock will probably waste lots of cycles.  */
                   1071:     flipc_simple_lock pail_alloc_lock;
                   1072: #endif
                   1073: 
                   1074:     /* All of the members of these structures except for the free pointer
                   1075:        are constant after initialization.  The free pointer is ail owned
                   1076:        and private.  */
                   1077:     struct flipc_cb_type_ctl endpoint;
                   1078:     struct flipc_cb_type_ctl epgroup;
                   1079:     struct flipc_cb_type_ctl bufferlist;
                   1080:     struct flipc_cb_type_ctl data_buffer;
                   1081: 
                   1082:     /* Global synchronization with the message engine.  On the KKT
                   1083:        implementation we need one synchronizer for each thread.  */
                   1084: 
                   1085:     /* Send side: */
                   1086:     volatile unsigned long sail_request_sync; /* request_sync = !request_sync when the
                   1087:                                          AIL wants to synchronize with the
                   1088:                                          CB.  */
                   1089:     volatile unsigned long sme_respond_sync; /* respond_sync = !respond_sync when
                   1090:                                         the ME has noticed the sync
                   1091:                                         request.  By responding to the
                   1092:                                         sync, the ME is stating that it has
                   1093:                                         no communications buffer state that
                   1094:                                         was cached previous to it noticing
                   1095:                                         the sync.    */
                   1096: 
                   1097:     /* Receive side.  */
                   1098:     volatile unsigned long sme_receive_in_progress; /* Set by the ME before it looks at
                   1099:                                                any data structures; cleared
                   1100:                                                afterwards.  A simple spin in
                   1101:                                                the user space on this
                   1102:                                                variable will suffice, as the
                   1103:                                                time that the message
                   1104:                                                engine could be receiving
                   1105:                                                is low.  */
                   1106: 
                   1107:     /* Send endpoint list starts here.  */
                   1108:     volatile flipc_cb_ptr sail_send_endpoint_list; /* Null if no send endpoints.
                   1109:                                                    */
                   1110: 
                   1111:     /* Keep track of whatever performance information we choose.  */
                   1112:     struct FLIPC_domain_performance_info performance;
                   1113: 
                   1114:     /* Keep track of various kinds of error information here.  */
                   1115:     struct FLIPC_domain_errors sme_error_log;
                   1116: 
                   1117: } *flipc_comm_buffer_ctl_t;
                   1118: 
                   1119: 
                   1120: /*
                   1121:  * The communications buffer.
                   1122:  *
                   1123:  * The only restriction on the layout of the communications buffer is
                   1124:  * that the buffers themselves may not cross page boundaries.  So we
                   1125:  * will place the data buffers at the end of the communications
                   1126:  * buffer, and the other objects at the beginning, and there may be a
                   1127:  * little bit of extra space in the middle.
                   1128:  *
                   1129:  * Note that this layout may change in future versions of FLIPC.
                   1130:  *
                   1131:  *     +---------------------------+
                   1132:  *     |    flipc_comm_buffer_ctl  |
                   1133:  *     +---------------------------+
                   1134:  *     |                           |
                   1135:  *     |         Endpoints         |
                   1136:  *     |                           |
                   1137:  *     +---------------------------+
                   1138:  *     |                           |
                   1139:  *     |      Endpoint Groups      |
                   1140:  *     |                           |
                   1141:  *     +---------------------------+
                   1142:  *     |                           |
                   1143:  *     | Combined Buffer Lists     |
                   1144:  *     |                           |
                   1145:  *     +---------------------------+
                   1146:  *     |                           |
                   1147:  *     | (Possible empty space)    |
                   1148:  *     |                           |
                   1149:  *     +---------------------------+
                   1150:  *     |                           |
                   1151:  *     |    Data Buffers           |
                   1152:  *     |                           |
                   1153:  *     +---------------------------+
                   1154:  */
                   1155: 
                   1156: /* The number of pages that the kernel will reserve for the comm
                   1157:    buffer.  The AIL needs to know this to know how much to map.  */
                   1158: #define COMM_BUFFER_SIZE 0x100000
                   1159: 
                   1160: /*
                   1161:  * These variables are set, in a per-address space context, to the base
                   1162:  * and length of the communications buffer.  The ME needs to do bounds
                   1163:  * checking to make sure it isn't overrunning anything.  Note that the
                   1164:  * existence of these variables implies that an application will only
                   1165:  * open a single domain.
                   1166:  *
                   1167:  * These declarations are duplicated in flipc/flipc_usermsg.h, and
                   1168:  * should be kept in sync with that file.
                   1169:  */
                   1170: unsigned char *flipc_cb_base;
                   1171: unsigned long flipc_cb_length;         /* In bytes.  */
                   1172: 
                   1173: /*
                   1174:  * Following is a set of macros to convert back and forth between
                   1175:  * real address pointers and flipc_cb_ptr's for each data type.  They
                   1176:  * rely on the flipc_cb_base being set correctly.
                   1177:  *
                   1178:  * A possible future improvement might be to have bounds checking occur
                   1179:  * inside these macros, but I'm not sure what I'd do if it failed.
                   1180:  */
                   1181: 
                   1182: /* Easy going one way.  */
                   1183: #define FLIPC_CBPTR(ptr) \
                   1184: (((unsigned char *) (ptr)) - flipc_cb_base)
                   1185: 
                   1186: /* Need to get the right types going the other way.  */
                   1187: #define FLIPC_ENDPOINT_PTR(cb_ptr) \
                   1188: ((flipc_endpoint_t) ((cb_ptr) + flipc_cb_base))
                   1189: #define FLIPC_EPGROUP_PTR(cb_ptr) \
                   1190: ((flipc_epgroup_t) ((cb_ptr) + flipc_cb_base))
                   1191: #define FLIPC_DATA_BUFFER_PTR(cb_ptr) \
                   1192: ((flipc_data_buffer_t) ((cb_ptr) + flipc_cb_base))
                   1193: #define FLIPC_BUFFERLIST_PTR(cb_ptr) \
                   1194: ((flipc_cb_ptr *) ((cb_ptr) + flipc_cb_base))
                   1195: 
                   1196: 
                   1197: /*
                   1198:  * Flipc addresses.
                   1199:  *
                   1200:  * The addresses used by flipc for communication are defined in the
                   1201:  * user visible header file as unsigned longs.  These macros pull that
                   1202:  * information apart for use of the FLIPC internal routines.
                   1203:  *
                   1204:  * I assume in the following that endpoints immediately follow the
                   1205:  * comm buffer control structure, because that makes indexing into
                   1206:  * them much easier.
                   1207:  */
                   1208: 
                   1209: #define FLIPC_CREATE_ADDRESS(node, endpoint_idx) \
                   1210: ((node << 16) | (endpoint_idx))
                   1211: #define FLIPC_ADDRESS_NODE(addr) (((unsigned long) (addr)) >> 16)
                   1212: #define FLIPC_ADDRESS_ENDPOINT(addr) (((unsigned long) (addr)) & 0xffff)
                   1213: 
                   1214: #endif /* _MACH_FLIPC_CB_H_ */

unix.superglobalmegacorp.com

This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.