Annotation of qemu/roms/qemu-palcode/memcpy.c, revision 1.1.1.1

1.1       root        1: /*
                      2:  *  linux/arch/alpha/lib/memcpy.c
                      3:  *
                      4:  *  Copyright (C) 1995  Linus Torvalds
                      5:  */
                      6: 
                      7: /*
                      8:  * This is a reasonably optimized memcpy() routine.
                      9:  */
                     10: 
                     11: #include <string.h>
                     12: 
                     13: /*
                     14:  * Note that the C code is written to be optimized into good assembly. However,
                     15:  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
                     16:  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
                     17:  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
                     18:  */
                     19: 
                     20: /*
                     21:  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
                     22:  * with a macro so that we can fix it up later..
                     23:  */
                     24: #define ALIGN_DEST_TO8_UP(d,s,n) \
                     25:        while (d & 7) { \
                     26:                if (n <= 0) return; \
                     27:                n--; \
                     28:                *(char *) d = *(char *) s; \
                     29:                d++; s++; \
                     30:        }
                     31: #define ALIGN_DEST_TO8_DN(d,s,n) \
                     32:        while (d & 7) { \
                     33:                if (n <= 0) return; \
                     34:                n--; \
                     35:                d--; s--; \
                     36:                *(char *) d = *(char *) s; \
                     37:        }
                     38: 
                     39: /*
                     40:  * This should similarly be done with ldq_u*2/mask/stq. The destination
                     41:  * is aligned, but we don't fill in a full quad-word
                     42:  */
                     43: #define DO_REST_UP(d,s,n) \
                     44:        while (n > 0) { \
                     45:                n--; \
                     46:                *(char *) d = *(char *) s; \
                     47:                d++; s++; \
                     48:        }
                     49: #define DO_REST_DN(d,s,n) \
                     50:        while (n > 0) { \
                     51:                n--; \
                     52:                d--; s--; \
                     53:                *(char *) d = *(char *) s; \
                     54:        }
                     55: 
                     56: /*
                     57:  * This should be done with ldq/mask/stq. The source and destination are
                     58:  * aligned, but we don't fill in a full quad-word
                     59:  */
                     60: #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
                     61: #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
                     62: 
                     63: /*
                     64:  * This does unaligned memory copies. We want to avoid storing to
                     65:  * an unaligned address, as that would do a read-modify-write cycle.
                     66:  * We also want to avoid double-reading the unaligned reads.
                     67:  *
                     68:  * Note the ordering to try to avoid load (and address generation) latencies.
                     69:  */
                     70: static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
                     71:                                          long n)
                     72: {
                     73:        ALIGN_DEST_TO8_UP(d,s,n);
                     74:        n -= 8;                 /* to avoid compare against 8 in the loop */
                     75:        if (n >= 0) {
                     76:                unsigned long low_word, high_word;
                     77:                __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
                     78:                do {
                     79:                        unsigned long tmp;
                     80:                        __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
                     81:                        n -= 8;
                     82:                        __asm__("extql %1,%2,%0"
                     83:                                :"=r" (low_word)
                     84:                                :"r" (low_word), "r" (s));
                     85:                        __asm__("extqh %1,%2,%0"
                     86:                                :"=r" (tmp)
                     87:                                :"r" (high_word), "r" (s));
                     88:                        s += 8;
                     89:                        *(unsigned long *) d = low_word | tmp;
                     90:                        d += 8;
                     91:                        low_word = high_word;
                     92:                } while (n >= 0);
                     93:        }
                     94:        n += 8;
                     95:        DO_REST_UP(d,s,n);
                     96: }
                     97: 
                     98: static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
                     99:                                          long n)
                    100: {
                    101:        /* I don't understand AXP assembler well enough for this. -Tim */
                    102:        s += n;
                    103:        d += n;
                    104:        while (n--)
                    105:                * (char *) --d = * (char *) --s;
                    106: }
                    107: 
                    108: /*
                    109:  * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
                    110:  * for the load-store. I don't know why, but it would seem that using a floating
                    111:  * point register for the move seems to slow things down (very small difference,
                    112:  * though).
                    113:  *
                    114:  * Note the ordering to try to avoid load (and address generation) latencies.
                    115:  */
                    116: static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
                    117:                                        long n)
                    118: {
                    119:        ALIGN_DEST_TO8_UP(d,s,n);
                    120:        n -= 8;
                    121:        while (n >= 0) {
                    122:                unsigned long tmp;
                    123:                __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
                    124:                n -= 8;
                    125:                s += 8;
                    126:                *(unsigned long *) d = tmp;
                    127:                d += 8;
                    128:        }
                    129:        n += 8;
                    130:        DO_REST_ALIGNED_UP(d,s,n);
                    131: }
                    132: static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
                    133:                                        long n)
                    134: {
                    135:        s += n;
                    136:        d += n;
                    137:        ALIGN_DEST_TO8_DN(d,s,n);
                    138:        n -= 8;
                    139:        while (n >= 0) {
                    140:                unsigned long tmp;
                    141:                s -= 8;
                    142:                __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
                    143:                n -= 8;
                    144:                d -= 8;
                    145:                *(unsigned long *) d = tmp;
                    146:        }
                    147:        n += 8;
                    148:        DO_REST_ALIGNED_DN(d,s,n);
                    149: }
                    150: 
                    151: void * memcpy(void * dest, const void *src, size_t n)
                    152: {
                    153:        if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
                    154:                __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
                    155:                                     n);
                    156:                return dest;
                    157:        }
                    158:        __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
                    159:        return dest;
                    160: }

unix.superglobalmegacorp.com