Annotation of qemu/roms/qemu-palcode/memcpy.c, revision 1.1

1.1     ! root        1: /*
        !             2:  *  linux/arch/alpha/lib/memcpy.c
        !             3:  *
        !             4:  *  Copyright (C) 1995  Linus Torvalds
        !             5:  */
        !             6: 
        !             7: /*
        !             8:  * This is a reasonably optimized memcpy() routine.
        !             9:  */
        !            10: 
        !            11: #include <string.h>
        !            12: 
        !            13: /*
        !            14:  * Note that the C code is written to be optimized into good assembly. However,
        !            15:  * at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
        !            16:  * explicit compare against 0 (instead of just using the proper "blt reg, xx" or
        !            17:  * "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
        !            18:  */
        !            19: 
        !            20: /*
        !            21:  * This should be done in one go with ldq_u*2/mask/stq_u. Do it
        !            22:  * with a macro so that we can fix it up later..
        !            23:  */
        !            24: #define ALIGN_DEST_TO8_UP(d,s,n) \
        !            25:        while (d & 7) { \
        !            26:                if (n <= 0) return; \
        !            27:                n--; \
        !            28:                *(char *) d = *(char *) s; \
        !            29:                d++; s++; \
        !            30:        }
        !            31: #define ALIGN_DEST_TO8_DN(d,s,n) \
        !            32:        while (d & 7) { \
        !            33:                if (n <= 0) return; \
        !            34:                n--; \
        !            35:                d--; s--; \
        !            36:                *(char *) d = *(char *) s; \
        !            37:        }
        !            38: 
        !            39: /*
        !            40:  * This should similarly be done with ldq_u*2/mask/stq. The destination
        !            41:  * is aligned, but we don't fill in a full quad-word
        !            42:  */
        !            43: #define DO_REST_UP(d,s,n) \
        !            44:        while (n > 0) { \
        !            45:                n--; \
        !            46:                *(char *) d = *(char *) s; \
        !            47:                d++; s++; \
        !            48:        }
        !            49: #define DO_REST_DN(d,s,n) \
        !            50:        while (n > 0) { \
        !            51:                n--; \
        !            52:                d--; s--; \
        !            53:                *(char *) d = *(char *) s; \
        !            54:        }
        !            55: 
        !            56: /*
        !            57:  * This should be done with ldq/mask/stq. The source and destination are
        !            58:  * aligned, but we don't fill in a full quad-word
        !            59:  */
        !            60: #define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
        !            61: #define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
        !            62: 
        !            63: /*
        !            64:  * This does unaligned memory copies. We want to avoid storing to
        !            65:  * an unaligned address, as that would do a read-modify-write cycle.
        !            66:  * We also want to avoid double-reading the unaligned reads.
        !            67:  *
        !            68:  * Note the ordering to try to avoid load (and address generation) latencies.
        !            69:  */
        !            70: static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
        !            71:                                          long n)
        !            72: {
        !            73:        ALIGN_DEST_TO8_UP(d,s,n);
        !            74:        n -= 8;                 /* to avoid compare against 8 in the loop */
        !            75:        if (n >= 0) {
        !            76:                unsigned long low_word, high_word;
        !            77:                __asm__("ldq_u %0,%1":"=r" (low_word):"m" (*(unsigned long *) s));
        !            78:                do {
        !            79:                        unsigned long tmp;
        !            80:                        __asm__("ldq_u %0,%1":"=r" (high_word):"m" (*(unsigned long *)(s+8)));
        !            81:                        n -= 8;
        !            82:                        __asm__("extql %1,%2,%0"
        !            83:                                :"=r" (low_word)
        !            84:                                :"r" (low_word), "r" (s));
        !            85:                        __asm__("extqh %1,%2,%0"
        !            86:                                :"=r" (tmp)
        !            87:                                :"r" (high_word), "r" (s));
        !            88:                        s += 8;
        !            89:                        *(unsigned long *) d = low_word | tmp;
        !            90:                        d += 8;
        !            91:                        low_word = high_word;
        !            92:                } while (n >= 0);
        !            93:        }
        !            94:        n += 8;
        !            95:        DO_REST_UP(d,s,n);
        !            96: }
        !            97: 
        !            98: static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
        !            99:                                          long n)
        !           100: {
        !           101:        /* I don't understand AXP assembler well enough for this. -Tim */
        !           102:        s += n;
        !           103:        d += n;
        !           104:        while (n--)
        !           105:                * (char *) --d = * (char *) --s;
        !           106: }
        !           107: 
        !           108: /*
        !           109:  * Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
        !           110:  * for the load-store. I don't know why, but it would seem that using a floating
        !           111:  * point register for the move seems to slow things down (very small difference,
        !           112:  * though).
        !           113:  *
        !           114:  * Note the ordering to try to avoid load (and address generation) latencies.
        !           115:  */
        !           116: static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
        !           117:                                        long n)
        !           118: {
        !           119:        ALIGN_DEST_TO8_UP(d,s,n);
        !           120:        n -= 8;
        !           121:        while (n >= 0) {
        !           122:                unsigned long tmp;
        !           123:                __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
        !           124:                n -= 8;
        !           125:                s += 8;
        !           126:                *(unsigned long *) d = tmp;
        !           127:                d += 8;
        !           128:        }
        !           129:        n += 8;
        !           130:        DO_REST_ALIGNED_UP(d,s,n);
        !           131: }
        !           132: static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
        !           133:                                        long n)
        !           134: {
        !           135:        s += n;
        !           136:        d += n;
        !           137:        ALIGN_DEST_TO8_DN(d,s,n);
        !           138:        n -= 8;
        !           139:        while (n >= 0) {
        !           140:                unsigned long tmp;
        !           141:                s -= 8;
        !           142:                __asm__("ldq %0,%1":"=r" (tmp):"m" (*(unsigned long *) s));
        !           143:                n -= 8;
        !           144:                d -= 8;
        !           145:                *(unsigned long *) d = tmp;
        !           146:        }
        !           147:        n += 8;
        !           148:        DO_REST_ALIGNED_DN(d,s,n);
        !           149: }
        !           150: 
        !           151: void * memcpy(void * dest, const void *src, size_t n)
        !           152: {
        !           153:        if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
        !           154:                __memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
        !           155:                                     n);
        !           156:                return dest;
        !           157:        }
        !           158:        __memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
        !           159:        return dest;
        !           160: }

unix.superglobalmegacorp.com