Annotation of linux/lib/malloc.c, revision 1.1.1.3

1.1       root        1: /*
                      2:  * malloc.c --- a general purpose kernel memory allocator for Linux.
1.1.1.3 ! root        3:  *
1.1       root        4:  * Written by Theodore Ts'o (tytso@mit.edu), 11/29/91
                      5:  *
                      6:  * This routine is written to be as fast as possible, so that it
                      7:  * can be called from the interrupt level.
                      8:  *
                      9:  * Limitations: maximum size of memory we can allocate using this routine
                     10:  *     is 4k, the size of a page in Linux.
                     11:  *
                     12:  * The general game plan is that each page (called a bucket) will only hold
                     13:  * objects of a given size.  When all of the object on a page are released,
                     14:  * the page can be returned to the general free pool.  When malloc() is
                     15:  * called, it looks for the smallest bucket size which will fulfill its
                     16:  * request, and allocate a piece of memory from that bucket pool.
                     17:  *
1.1.1.3 ! root       18:  * Each bucket has as its control block a bucket descriptor which keeps
1.1       root       19:  * track of how many objects are in use on that page, and the free list
                     20:  * for that page.  Like the buckets themselves, bucket descriptors are
                     21:  * stored on pages requested from get_free_page().  However, unlike buckets,
                     22:  * pages devoted to bucket descriptor pages are never released back to the
                     23:  * system.  Fortunately, a system should probably only need 1 or 2 bucket
                     24:  * descriptor pages, since a page can hold 256 bucket descriptors (which
1.1.1.3 ! root       25:  * corresponds to 1 megabyte worth of bucket pages.)  If the kernel is using
1.1       root       26:  * that much allocated memory, it's probably doing something wrong.  :-)
                     27:  *
                     28:  * Note: malloc() and free() both call get_free_page() and free_page()
                     29:  *     in sections of code where interrupts are turned off, to allow
                     30:  *     malloc() and free() to be safely called from an interrupt routine.
                     31:  *     (We will probably need this functionality when networking code,
                     32:  *     particularily things like NFS, is added to Linux.)  However, this
                     33:  *     presumes that get_free_page() and free_page() are interrupt-level
                     34:  *     safe, which they may not be once paging is added.  If this is the
                     35:  *     case, we will need to modify malloc() to keep a few unused pages
                     36:  *     "pre-allocated" so that it can safely draw upon those pages if
                     37:  *     it is called from an interrupt routine.
                     38:  *
1.1.1.3 ! root       39:  *     Another concern is that get_free_page() should not sleep; if it
        !            40:  *     does, the code is carefully ordered so as to avoid any race
        !            41:  *     conditions.  The catch is that if malloc() is called re-entrantly,
        !            42:  *     there is a chance that unecessary pages will be grabbed from the
        !            43:  *     system.  Except for the pages for the bucket descriptor page, the
1.1       root       44:  *     extra pages will eventually get released back to the system, though,
                     45:  *     so it isn't all that bad.
                     46:  */
                     47: 
1.1.1.3 ! root       48: /* I'm going to modify it to keep some free pages around.  Get free page
        !            49:    can sleep, and tcp/ip needs to call malloc at interrupt time  (Or keep
        !            50:    big buffers around for itself.)  I guess I'll have return from
        !            51:    syscall fill up the free page descriptors. -RAB */
        !            52: 
        !            53: /* since the advent of GFP_ATOMIC, I've changed the malloc code to
        !            54:    use it and return NULL if it can't get a page. -RAB */
        !            55: 
1.1       root       56: #include <linux/kernel.h>
                     57: #include <linux/mm.h>
                     58: #include <asm/system.h>
                     59: 
                     60: struct bucket_desc {   /* 16 bytes */
                     61:        void                    *page;
                     62:        struct bucket_desc      *next;
                     63:        void                    *freeptr;
                     64:        unsigned short          refcnt;
                     65:        unsigned short          bucket_size;
                     66: };
                     67: 
                     68: struct _bucket_dir {   /* 8 bytes */
                     69:        int                     size;
                     70:        struct bucket_desc      *chain;
                     71: };
                     72: 
                     73: /*
                     74:  * The following is the where we store a pointer to the first bucket
1.1.1.3 ! root       75:  * descriptor for a given size.
1.1       root       76:  *
                     77:  * If it turns out that the Linux kernel allocates a lot of objects of a
                     78:  * specific size, then we may want to add that specific size to this list,
                     79:  * since that will allow the memory to be allocated more efficiently.
                     80:  * However, since an entire page must be dedicated to each specific size
                     81:  * on this list, some amount of temperance must be exercised here.
                     82:  *
                     83:  * Note that this list *must* be kept in order.
                     84:  */
                     85: struct _bucket_dir bucket_dir[] = {
                     86:        { 16,   (struct bucket_desc *) 0},
                     87:        { 32,   (struct bucket_desc *) 0},
                     88:        { 64,   (struct bucket_desc *) 0},
                     89:        { 128,  (struct bucket_desc *) 0},
                     90:        { 256,  (struct bucket_desc *) 0},
                     91:        { 512,  (struct bucket_desc *) 0},
                     92:        { 1024, (struct bucket_desc *) 0},
                     93:        { 2048, (struct bucket_desc *) 0},
                     94:        { 4096, (struct bucket_desc *) 0},
                     95:        { 0,    (struct bucket_desc *) 0}};   /* End of list marker */
                     96: 
                     97: /*
                     98:  * This contains a linked list of free bucket descriptor blocks
                     99:  */
1.1.1.3 ! root      100: static struct bucket_desc *free_bucket_desc = (struct bucket_desc *) 0;
1.1       root      101: 
                    102: /*
                    103:  * This routine initializes a bucket description page.
                    104:  */
1.1.1.3 ! root      105: static inline int init_bucket_desc()
1.1       root      106: {
                    107:        struct bucket_desc *bdesc, *first;
1.1.1.3 ! root      108:        int i;
        !           109:        /* this turns interrupt on, so we should be carefull. */
        !           110:        first = bdesc = (struct bucket_desc *) get_free_page(GFP_ATOMIC);
1.1       root      111:        if (!bdesc)
1.1.1.3 ! root      112:                return 1;
1.1       root      113:        for (i = PAGE_SIZE/sizeof(struct bucket_desc); i > 1; i--) {
                    114:                bdesc->next = bdesc+1;
                    115:                bdesc++;
                    116:        }
                    117:        /*
1.1.1.3 ! root      118:         * This is done last, to avoid race conditions in case
1.1       root      119:         * get_free_page() sleeps and this routine gets called again....
                    120:         */
1.1.1.3 ! root      121:        /* Get free page will not sleep because of the GFP_ATOMIC */
1.1       root      122:        bdesc->next = free_bucket_desc;
                    123:        free_bucket_desc = first;
1.1.1.3 ! root      124:        return (0);
1.1       root      125: }
                    126: 
                    127: void *malloc(unsigned int len)
                    128: {
                    129:        struct _bucket_dir      *bdir;
                    130:        struct bucket_desc      *bdesc;
                    131:        void                    *retval;
                    132: 
                    133:        /*
                    134:         * First we search the bucket_dir to find the right bucket change
                    135:         * for this request.
                    136:         */
                    137:        for (bdir = bucket_dir; bdir->size; bdir++)
                    138:                if (bdir->size >= len)
                    139:                        break;
1.1.1.3 ! root      140: 
1.1       root      141:        if (!bdir->size) {
1.1.1.3 ! root      142:                printk("malloc called with impossibly large argument (%d)\n", len);
        !           143:                return NULL;
1.1       root      144:        }
                    145:        /*
                    146:         * Now we search for a bucket descriptor which has free space
                    147:         */
                    148:        cli();  /* Avoid race conditions */
1.1.1.3 ! root      149:        for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next)
1.1       root      150:                if (bdesc->freeptr)
                    151:                        break;
                    152:        /*
1.1.1.3 ! root      153:         * If we didn't find a bucket with free space, then we'll
1.1       root      154:         * allocate a new one.
                    155:         */
                    156:        if (!bdesc) {
1.1.1.3 ! root      157:                char *cp;
        !           158:                int i;
1.1       root      159: 
1.1.1.3 ! root      160:                if (!free_bucket_desc)
        !           161:                        if (init_bucket_desc()) {
        !           162:                                sti();
        !           163:                                return NULL;
        !           164:                        }
1.1       root      165:                bdesc = free_bucket_desc;
                    166:                free_bucket_desc = bdesc->next;
                    167:                bdesc->refcnt = 0;
                    168:                bdesc->bucket_size = bdir->size;
1.1.1.3 ! root      169:                bdesc->page = bdesc->freeptr =
        !           170:                  (void *) cp = get_free_page(GFP_ATOMIC);
        !           171:                if (!cp) {
        !           172:                        sti();
        !           173:                        return NULL;
        !           174:                }
1.1       root      175:                /* Set up the chain of free objects */
                    176:                for (i=PAGE_SIZE/bdir->size; i > 1; i--) {
                    177:                        *((char **) cp) = cp + bdir->size;
                    178:                        cp += bdir->size;
                    179:                }
                    180:                *((char **) cp) = 0;
                    181:                bdesc->next = bdir->chain; /* OK, link it in! */
                    182:                bdir->chain = bdesc;
                    183:        }
                    184:        retval = (void *) bdesc->freeptr;
                    185:        bdesc->freeptr = *((void **) retval);
                    186:        bdesc->refcnt++;
                    187:        sti();  /* OK, we're safe again */
1.1.1.3 ! root      188:        return retval;
1.1       root      189: }
                    190: 
                    191: /*
                    192:  * Here is the free routine.  If you know the size of the object that you
                    193:  * are freeing, then free_s() will use that information to speed up the
                    194:  * search for the bucket descriptor.
1.1.1.3 ! root      195:  *
1.1       root      196:  * We will #define a macro so that "free(x)" is becomes "free_s(x, 0)"
                    197:  */
                    198: void free_s(void *obj, int size)
                    199: {
                    200:        void            *page;
                    201:        struct _bucket_dir      *bdir;
                    202:        struct bucket_desc      *bdesc, *prev;
                    203: 
                    204:        /* Calculate what page this object lives in */
                    205:        page = (void *)  ((unsigned long) obj & 0xfffff000);
                    206:        /* Now search the buckets looking for that page */
                    207:        for (bdir = bucket_dir; bdir->size; bdir++) {
                    208:                prev = 0;
                    209:                /* If size is zero then this conditional is always false */
                    210:                if (bdir->size < size)
                    211:                        continue;
                    212:                for (bdesc = bdir->chain; bdesc; bdesc = bdesc->next) {
1.1.1.3 ! root      213:                        if (bdesc->page == page)
1.1       root      214:                                goto found;
                    215:                        prev = bdesc;
                    216:                }
                    217:        }
1.1.1.3 ! root      218:        printk("Bad address passed to kernel free_s()");
        !           219:        return;
1.1       root      220: found:
                    221:        cli(); /* To avoid race conditions */
                    222:        *((void **)obj) = bdesc->freeptr;
                    223:        bdesc->freeptr = obj;
                    224:        bdesc->refcnt--;
                    225:        if (bdesc->refcnt == 0) {
                    226:                /*
                    227:                 * We need to make sure that prev is still accurate.  It
                    228:                 * may not be, if someone rudely interrupted us....
                    229:                 */
                    230:                if ((prev && (prev->next != bdesc)) ||
                    231:                    (!prev && (bdir->chain != bdesc)))
                    232:                        for (prev = bdir->chain; prev; prev = prev->next)
                    233:                                if (prev->next == bdesc)
                    234:                                        break;
                    235:                if (prev)
                    236:                        prev->next = bdesc->next;
                    237:                else {
                    238:                        if (bdir->chain != bdesc)
                    239:                                panic("malloc bucket chains corrupted");
                    240:                        bdir->chain = bdesc->next;
                    241:                }
                    242:                free_page((unsigned long) bdesc->page);
                    243:                bdesc->next = free_bucket_desc;
                    244:                free_bucket_desc = bdesc;
                    245:        }
                    246:        sti();
                    247:        return;
                    248: }

unix.superglobalmegacorp.com