pgp/contrib/idea/idea.asm - annotate

Return to idea.asm CVS log
Up to [PGP] / pgp / contrib / idea
Annotation of pgp/contrib/idea/idea.asm, revision 1.1.1.1

1.1       root        1:        %PAGESIZE       59      ; Turbo assembler formatting codes
                      2:        %BIN    13
                      3:        %LINUM  3
                      4: 
                      5: ; Copyright (c) 1993 Colin Plumb.  This code may be freely
                      6: ; distributed under the terms of the GNU General Public Licence.
                      7: 
                      8:        .model large
                      9:        .code
                     10: 
                     11: ; A core operation in IDEA is multiplication modulo 65537.
                     12: ; The valid inputs, 1 through 66636 inclusive are represented in
                     13: ; 16-bit registers modulo 65536.  I.e. a value of 0 means 65536,
                     14: ; or -1.  Thus, we need to test for that specially.  -x, modulo
                     15: ; 65537, is 65537-x = 1-x.
                     16: ; For any other number, represent the product as a*65536+b.  Since
                     17: ; 65536 = -1 (mod 65537), this is the same number as b-a.  Should
                     18: ; this result be negautive (generate a borrow), -n mod 65537 = 1-n
                     19: ; mod 65536.  Or in other words, if you add the borrow bit back on,
                     20: ; you get the right answer.
                     21: 
                     22: ; This is what the assembly code does.  It forms a zero, and adds
                     23: ; that on with carry.
                     24: 
                     25: ; Another useful optimisation takes advantage of the fact that
                     26: ; a and b are equal only if the answer is congruent to 0 mod 65537.
                     27: ; Since 65537 is prime, this happens only if one of the inputs is
                     28: ; congruent to 0 mod 65537.  Since the inputs are all less than 65537,
                     29: ; this means it must have been zero.
                     30: 
                     31: ; The code below tests for a zero result of the subtraction, and if
                     32: ; one arises, it branches out of line to figure out what happened.
                     33: 
                     34: 
                     35: ; This code implemets the IDEA encryption algorithm.
                     36: ; It follows in pseudo-C, where the * operator operates
                     37: ; modulo 65537, as Idea needs.  (If you don't understand,
                     38: ; learn IDEA better.)
                     39: 
                     40: ; IDEA is works on 16-bit units.  If you're processing bytes,
                     41: ; it's defined to be big-endian, so an Intel machine needs to
                     42: ; swap the bytes around.
                     43: 
                     44: ; void Idea(u_int16 *in, u_int16 *out, u_int16 *key)
                     45: ; {
                     46: ;      register u+int16 x0, x1, x2, x3, s1, s2, round;
                     47: ;
                     48: ;      x0 = *in++;  x1 = *in++;  x2 = *in++;  x3 = *in;
                     49: ;
                     50: ;      for (round = 0; round < 8; round++) {
                     51: ;              x0 *= *key++;
                     52: ;              x1 += *key++;
                     53: ;              x2 += *key++;
                     54: ;              x3 *= *key++;
                     55: ;
                     56: ;              s1  = x1;  s2  = x2;
                     57: ;              x2 ^= x0;  x1 ^= x3;
                     58: ;
                     59: ;              x2 *= *key++;
                     60: ;              x1 += x2;
                     61: ;              x1 *= *key++;
                     62: ;              x2 += x1;
                     63: ;
                     64: ;              x0 ^= x1;  x3 ^= x2;
                     65: ;              x1 ^= s2;  x2 ^= s1;
                     66: ;      }
                     67: ;      *out++ = x0 * *key++;
                     68: ;      *out++ = x2 + *key++;   /* Yes, this is x2, not x1 */
                     69: ;      *out++ = x1 + *key++;
                     70: ;      *out   = x3 * *key;
                     71: ; }
                     72: 
                     73: ; ds:si points to key, ax, dx are temps, args in bx, cx, di, bp
                     74: ; Trashes *all* registers.  direction flag must be clear.
                     75: ; Leaves es zero.
                     76: 
                     77: ; Since there is no spare register to hold the loop count, I make
                     78: ; clever use of the stack, pushing the start of the loop several
                     79: ; times and using a ret instruction to do the return.
                     80: 
                     81: ; Annoyingly, lods is fastest on 8086's, but other techniques are
                     82: ; best on 386's.  Well, that's what the manual says, but real
                     83: ; life is different.  USELODS wins on a 386SX, at least.
                     84: ; Leave it set for all platforms.
                     85: 
                     86: USELODS        equ     1
                     87: 
                     88: ; bp must be x0 for some of the code below to work
                     89: x0     equ     bp
                     90: x1     equ     bx
                     91: x2     equ     cx
                     92: x3     equ     di
                     93: ; di must be x3 for some of the code below to work
                     94: 
                     95: ;; Now, this is rather interesting.  We test for zero arguments
                     96: ;; after the multiply.  Assuming random inputs, one or both are
                     97: ;; zero (2^17-1)/2^32, or approximately 1/32786 of the time.
                     98: ;; Encryption in any feedback mode produces essentially random
                     99: ;; inputs, so average-case analysis is okay.  While we don't
                    100: ;; want the out-of-line code to waste time, it is not worth
                    101: ;; slowing down the in-line case to speed it up.
                    102: ;;
                    103: ;; Basically, we start inverting the source x, and if that was 0,
                    104: ;; we use the inverse of the key instead.
                    105: 
                    106: Core1Z:
                    107:        neg     x0
                    108:        jnz     Core1Za
                    109: if USELODS
                    110:        sub     x0,[si-2]
                    111: else
                    112:        sub     x0,[si]
                    113: endif
                    114: Core1Za:
                    115:        inc     x0
                    116:        jmp     Core1done
                    117: Core2Z:
                    118:        neg     x3
                    119:        jnz     Core2Za
                    120: if USELODS
                    121:        sub     x3,[si-2]
                    122: else
                    123:        sub     x3,[si+6]
                    124: endif
                    125: Core2Za:
                    126:        inc     x3
                    127:        jmp     Core2done
                    128: Core3Z:
                    129:        neg     x2
                    130:        jnz     Core3Za
                    131: if USELODS
                    132:        sub     x2,[si-2]
                    133: else
                    134:        sub     x2,[si+8]
                    135: endif
                    136: Core3Za:
                    137:        inc     x2
                    138:        jmp     Core3done
                    139: Core4Z:
                    140:        neg     x1
                    141:        jnz     Core4Za
                    142: if USELODS
                    143:        sub     x1,[si-2]
                    144: else
                    145:        sub     x1,[si+10]
                    146: endif
                    147: Core4Za:
                    148:        inc     x1
                    149:        jmp     Core4done
                    150: 
                    151: ; We need a constant 0 that we can move into a register without affecting
                    152: ; the carry flag (as the classic xor ax,ax is wont to do), so we use the
                    153: ; es register for a constant 0 source.  This is okay even in protected
                    154: ; mode.  (I *told* you this was tricky code!)
                    155: 
                    156: ; BTW, since you wanted to know, this is 8 + 78*4 + 16 = 336 instructions.
                    157: 
                    158: Core   proc    near
                    159:        xor     ax,ax
                    160:        mov     es,ax
                    161:        mov     ax,OFFSET Finish
                    162:        push    ax
                    163:        mov     ax,OFFSET Coreloop
                    164:        push    ax      ; Loop 3 times, then return
                    165:        push    ax
                    166:        push    ax
                    167: 
                    168: Coreloop:
                    169: if USELODS
                    170:        lodsw
                    171: else
                    172:        mov     ax,[si]         ; x0 *= *key++
                    173: endif
                    174:        mul     x0
                    175:        sub     ax,dx
                    176:        jz      Core1Z
                    177:        mov     x0,es
                    178:        adc     x0,ax
                    179: Core1done:
                    180: 
                    181: if USELODS
                    182:        lodsw
                    183:        add     x1,ax
                    184:        lodsw
                    185:        add     x2,ax
                    186: else
                    187:        add     x1,[si+2]       ; x1 += *key++
                    188:        add     x2,[si+4]       ; x2 += *key++
                    189: endif
                    190: 
                    191: if USELODS
                    192:        lodsw
                    193: else
                    194:        mov     ax,[si+6]       ; x3 += *key++
                    195: endif
                    196:        mul     x3
                    197:        sub     ax,dx
                    198:        jz      Core2Z
                    199:        mov     x3,es
                    200:        adc     x3,ax
                    201: Core2done:
                    202: 
                    203:        push    x1              ; s1 = x1
                    204:        push    x2              ; s2 = x2
                    205: 
                    206:        xor     x1,x3           ; x1 ^= x3
                    207:        xor     x2,x0           ; x2 ^= x0
                    208: 
                    209: if USELODS
                    210:        lodsw
                    211: else
                    212:        mov     ax,[si+8]       ; x2 *= *key++
                    213: endif
                    214:        mul     x2
                    215:        sub     ax,dx
                    216:        jz      Core3Z
                    217:        mov     x2,es
                    218:        adc     x2,ax
                    219: Core3done:
                    220: 
                    221:        add     x1,x2           ; x1 += x2
                    222: 
                    223: if USELODS
                    224:        lodsw
                    225: else
                    226:        mov     ax,[si+10]      ; x1 *= *key++
                    227: endif
                    228:        mul     x1
                    229:        sub     ax,dx
                    230:        jz      Core4Z
                    231:        mov     x1,es
                    232:        adc     x1,ax
                    233: Core4done:
                    234: 
                    235:        add     x2,x1           ; x2 += x1
                    236: 
                    237:        xor     x0,x1           ; x0 ^= x1
                    238:        xor     x3,x2           ; x3 ^= x2
                    239: 
                    240:        pop     dx
                    241:        xor     x1,dx           ; x1 ^= s2
                    242:        pop     dx
                    243:        xor     x2,dx           ; x2 ^= s1
                    244: 
                    245: ; Second unrolling of loop
                    246: if USELODS
                    247:        lodsw
                    248: else
                    249:        mov     ax,[si+12]      ; x0 *= *key++
                    250: endif
                    251:        mul     x0
                    252:        sub     ax,dx
                    253:        jz      Core5Z
                    254:        mov     x0,es
                    255:        adc     x0,ax
                    256: Core5done:
                    257: 
                    258: if USELODS
                    259:        lodsw
                    260:        add     x1,ax
                    261:        lodsw
                    262:        add     x2,ax
                    263: else
                    264:        add     x1,[si+14]      ; x1 += *key++
                    265:        add     x2,[si+16]      ; x2 += *key++
                    266: endif
                    267: 
                    268: if USELODS
                    269:        lodsw
                    270: else
                    271:        mov     ax,[si+18]      ; x3 *= *key++
                    272: endif
                    273:        mul     x3
                    274:        sub     ax,dx
                    275:        jz      Core6Z
                    276:        mov     x3,es
                    277:        adc     x3,ax
                    278: Core6done:
                    279: 
                    280:        push    x1              ; s1 = x1
                    281:        push    x2              ; s2 = x2
                    282: 
                    283:        xor     x1,x3           ; x1 ^= x3
                    284:        xor     x2,x0           ; x2 ^= x0
                    285: 
                    286: if USELODS
                    287:        lodsw
                    288: else
                    289:        mov     ax,[si+20]      ; x2 *= *key++
                    290: endif
                    291:        mul     x2
                    292:        sub     ax,dx
                    293:        jz      Core7Z
                    294:        mov     x2,es
                    295:        adc     x2,ax
                    296: Core7done:
                    297: 
                    298:        add     x1,x2           ; x1 += x2
                    299: 
                    300: if USELODS
                    301:        lodsw
                    302: else
                    303:        mov     ax,[si+22]      ; x1 *= *key++
                    304: endif
                    305:        mul     x1
                    306:        sub     ax,dx
                    307:        jz      Core8Z
                    308:        mov     x1,es
                    309:        adc     x1,ax
                    310: Core8done:
                    311: 
                    312:        add     x2,x1           ; x2 += x1
                    313: 
                    314:        xor     x0,x1           ; x0 ^= x1
                    315:        xor     x3,x2           ; x3 ^= x2
                    316: 
                    317:        pop     dx
                    318:        xor     x1,dx           ; x1 ^= s2
                    319:        pop     dx
                    320:        xor     x2,dx           ; x2 ^= s1
                    321: 
                    322: ife USELODS
                    323:        lea     si,[si+24]
                    324: endif
                    325: 
                    326:        ret     ; Used as a loop instruction!
                    327: 
                    328: Core5Z:
                    329:        neg     x0
                    330:        jnz     Core5Za
                    331: if USELODS
                    332:        sub     x0,[si-2]
                    333: else
                    334:        sub     x0,[si+12]
                    335: endif
                    336: Core5Za:
                    337:        inc     x0
                    338:        jmp     Core5done
                    339: Core6Z:
                    340:        neg     x3
                    341:        jnz     Core6Za
                    342: if USELODS
                    343:        sub     x3,[si-2]
                    344: else
                    345:        sub     x3,[si+18]
                    346: endif
                    347: Core6Za:
                    348:        inc     x3
                    349:        jmp     Core6done
                    350: Core7Z:
                    351:        neg     x2
                    352:        jnz     Core7Za
                    353: if USELODS
                    354:        sub     x2,[si-2]
                    355: else
                    356:        sub     x2,[si+20]
                    357: endif
                    358: Core7Za:
                    359:        inc     x2
                    360:        jmp     Core7done
                    361: Core8Z:
                    362:        neg     x1
                    363:        jnz     Core8Za
                    364: if USELODS
                    365:        sub     x1,[si-2]
                    366: else
                    367:        sub     x1,[si+22]
                    368: endif
                    369: Core8Za:
                    370:        inc     x1
                    371:        jmp     Core8done
                    372: Core9Z:
                    373:        neg     x0
                    374:        jnz     Core9Za
                    375: if USELODS
                    376:        sub     x0,[si-2]
                    377: else
                    378:        sub     x0,[si]
                    379: endif
                    380: Core9Za:
                    381:        inc     x0
                    382:        jmp     Core9done
                    383: ; Special: compute into dx (zero on entry)
                    384: Core10Z:
                    385:        sub     dx,x3
                    386:        jnz     Core10Za
                    387: if USELODS
                    388:        sub     dx,[si-2]
                    389: else
                    390:        sub     dx,[si+6]
                    391: endif
                    392: Core10Za:
                    393:        inc     dx
                    394: ;      jmp     Core10done
                    395:        ret
                    396: 
                    397: 
                    398: Finish:
                    399: if USELODS
                    400:        lodsw
                    401: else
                    402:        mov     ax,[si]         ; x0 *= *key++
                    403: endif
                    404:        mul     x0
                    405:        sub     ax,dx
                    406:        jz      Core9Z
                    407:        mov     x0,es
                    408:        adc     x0,ax
                    409: Core9done:
                    410: 
                    411:        xchg    x1,x2
                    412: if USELODS
                    413:        lodsw
                    414:        add     x1,ax
                    415:        lodsw
                    416:        add     x2,ax
                    417: else
                    418:        add     x1,[si+2]       ; x1 += *key++
                    419:        add     x2,[si+4]       ; x2 += *key++
                    420: endif
                    421: 
                    422: ; This is special: compute into dx, not x3
                    423: if USELODS
                    424:        lodsw
                    425: else
                    426:        mov     ax,[si+6]       ; x3 *= *key++
                    427: endif
                    428:        mul     x3
                    429:        sub     ax,dx
                    430:        mov     dx,es
                    431:        jz      Core10Z
                    432:        adc     dx,ax
                    433: Core10done:
                    434: 
                    435:        ret
                    436: 
                    437:        endp
                    438: 
                    439: 
                    440: ; Args are in, out, key
                    441:        public  _Idea2
                    442: _Idea2 proc far
                    443:        cld
                    444:        push    bp      ; Args start at [bp+6]
                    445:        mov     bp,sp
                    446:        push    si
                    447:        push    di
                    448:        push    ds      ; 6 more words here, so args are at [sp+12]
                    449:        lds     si,[bp+6]       ; in
                    450:        lodsw
                    451:        xchg    ah,al
                    452:        mov     dx,ax
                    453:        lodsw
                    454:        xchg    ah,al
                    455:        mov     x1,ax
                    456:        lodsw
                    457:        xchg    ah,al
                    458:        mov     x2,ax
                    459:        lodsw
                    460:        xchg    ah,al
                    461:        mov     x3,ax
                    462:        lds     si,[bp+14]      ; key
                    463: 
                    464:        mov     x0,dx
                    465: 
                    466:        call    Core
                    467: 
                    468:        mov     ax,x0
                    469:        mov     bp,sp
                    470:        les     di,[bp+16]
                    471:        xchg    ah,al
                    472:        stosw
                    473:        mov     ax,x1
                    474:        xchg    ah,al
                    475:        stosw
                    476:        mov     ax,x2
                    477:        xchg    ah,al
                    478:        stosw
                    479:        mov     ax,x3
                    480:        xchg    ah,al
                    481:        stosw
                    482: 
                    483:        pop     ds
                    484:        pop     di
                    485:        pop     si
                    486:        pop     bp
                    487: 
                    488:        ret
                    489: 
                    490:        endp
                    491: 
                    492: ; Okay, the basic plan for the CFB kernel is
                    493: ; get x0,x1,x2,x3
                    494: ; get key pointer
                    495: ; call core
                    496: ; get buffer pointers
                    497: ;Loop:
                    498: ; lodsw
                    499: ; xor  ax,x0
                    500: ; mov   x0,ax
                    501: ; stosw
                    502: ; lodsw
                    503: ; xor  ax,x1
                    504: ; mov  x0,ax
                    505: ; stosw
                    506: ; lodsw
                    507: ; xor  ax,x2
                    508: ; mov  x0,ax
                    509: ; stosw
                    510: ; lodsw
                    511: ; xor  ax,x3
                    512: ; mov  x3,ax
                    513: ; stosw
                    514: ; push buffer pointers
                    515: ; get key pointer
                    516: ; call core
                    517: ; pop buffer pointers
                    518: ; loop
                    519: ; lodsw/xor/etc.
                    520: ;
                    521: ;
                    522: ; This function is designed to go in the middle of a byte-granularity
                    523: ; CFB engine.  It performs "len" encryptions of the IV, encrypting
                    524: ; 8*(len-1) bytes from the source to the destination.  The idea is
                    525: ; that you first xor any odd leading bytes, then call this function,
                    526: ; then xor up to 8 trailing bytes.
                    527: 
                    528: ; The main loop in this is 38 instructions, plus the 336 for the core
                    529: ; makes 374 total.  That's 46.75 instructions per byte.
                    530: ; (It's the same for IdeaCFBx)
                    531: 
                    532: ; IV, key, plain, cipher, len
                    533:        public  _IdeaCFB
                    534: _IdeaCFB proc far       ; Args are at [sp+4]
                    535:        cld
                    536:        push    bp
                    537:        push    si
                    538:        push    di
                    539:        push    ds      ; 8 more words here, so args are at [sp+12]
                    540: ; To be precise, IV is at 12, key at 16, plain at 20,
                    541: ; cipher at 24 and len at 28
                    542:        mov     bp,sp
                    543:        lds     si,[bp+12]      ; IV
                    544: ; Load and byte-swap IV
                    545:        mov     ax,[si]
                    546:        xchg    ah,al
                    547:        mov     x1,[si+2]
                    548:        mov     x2,[si+4]
                    549:        xchg    bh,bl
                    550:        xchg    ch,cl
                    551:        mov     dx,[si+6]
                    552:        xchg    dh,dl
                    553: 
                    554:        lds     si,[bp+16]      ; Key
                    555:        mov     x0,ax
                    556:        mov     x3,dx
                    557: 
                    558:        call    Core
                    559: IdeaCFBLoop:
                    560: ;      mov     ax,x0
                    561: ;      mov     bp,sp
                    562: ;      dec     WORD PTR [bp+28]        ; Decrement count
                    563: ;      jz      IdeaCFBEnd
                    564: ;      lds     si,[bp+20]
                    565: ;      les     di,[bp+24]
                    566: ;      mov     x0,ax
                    567: ; Alternate code: (which is faster?  Two moves or three segment overrides?)
                    568:        mov     si,sp
                    569:        dec     WORD PTR ss:[si+28]
                    570:        jz      IdeaCFBEnd
                    571:        les     di,ss:[si+24]
                    572:        lds     si,ss:[si+20]
                    573: 
                    574:        lodsw
                    575:        xchg    ah,al
                    576:        xor     ax,x0
                    577:        mov     x0,ax
                    578:        xchg    ah,al
                    579:        stosw
                    580:        lodsw
                    581:        xchg    ah,al
                    582:        xor     ax,x1
                    583:        mov     x1,ax
                    584:        xchg    ah,al
                    585:        stosw
                    586:        lodsw
                    587:        xchg    ah,al
                    588:        xor     ax,x2
                    589:        mov     x2,ax
                    590:        xchg    ah,al
                    591:        stosw
                    592:        lodsw
                    593:        xchg    ah,al
                    594:        xor     ax,dx
                    595:        mov     dx,ax
                    596:        xchg    ah,al
                    597:        stosw
                    598: 
                    599: ;      mov     ax,x0
                    600: ;      mov     bp,sp
                    601: ;      mov     [bp+20],si      ; Save source offset
                    602: ;      mov     [bp+24],di      ; Save destination offset
                    603: ;      lds     si,[bp+16]      ; Key
                    604: ;      mov     x0,ax           ; Get x0 in place for another iteration
                    605: ; Alternate code for the above: (which is faster?  One move or three ss:?)
                    606:        mov     ax,si
                    607:        mov     si,sp
                    608:        mov     ss:[si+20],ax
                    609:        mov     ss:[si+24],di
                    610:        lds     si,ss:[si+16]
                    611: 
                    612:        mov     x3,dx           ; Get x3 in place
                    613:        mov     ax,OFFSET IdeaCFBLoop
                    614:        push    ax
                    615:        jmp     Core
                    616: 
                    617: IdeaCFBEnd:
                    618: ;      lds     si,[bp+12]
                    619:        lds     di,ss:[si+12]   ; Get IV for writing back
                    620: 
                    621:        mov     ax,x0
                    622:        xchg    ah,al
                    623:        mov     [di],ax         ; Use stosw?
                    624:        xchg    bh,bl
                    625:        xchg    ch,cl
                    626:        mov     [di+2],x1
                    627:        mov     [di+4],x2
                    628:        xchg    dh,dl
                    629:        mov     [di+6],dx
                    630: 
                    631:        pop     ds
                    632:        pop     di
                    633:        pop     si
                    634:        pop     bp
                    635: 
                    636:        ret
                    637: 
                    638:        endp
                    639: 
                    640: ; This decoding step is similar, except that instead of
                    641: ;      lods
                    642: ;      xor     x0,ax
                    643: ;      mov     ax,x0
                    644: ;      stos
                    645: ; the feedback step is
                    646: ;      lods
                    647: ;      xchg    x0,ax
                    648: ;      xor     ax,x0
                    649: ;      stos
                    650: 
                    651: ; IV, key, cipher, plain, len
                    652:        public  _IdeaCFBx
                    653: _IdeaCFBx proc far       ; Args are at [sp+4]
                    654:        cld
                    655:        push    bp
                    656:        push    si
                    657:        push    di
                    658:        push    ds      ; 8 more words here, so args are at [sp+12]
                    659:        mov     bp,sp
                    660:        lds     si,[bp+12]      ; IV
                    661: ; Load and byte-swap IV
                    662:        mov     ax,[si]
                    663:        xchg    ah,al
                    664:        mov     x1,[si+2]
                    665:        mov     x2,[si+4]
                    666:        xchg    bh,bl
                    667:        xchg    ch,cl
                    668:        mov     dx,[si+6]
                    669:        xchg    dh,dl
                    670: 
                    671:        lds     si,[bp+16]      ; Key
                    672:        mov     x0,ax
                    673:        mov     x3,dx
                    674: 
                    675:        call    Core
                    676: IdeaCFBxLoop:
                    677: ;      mov     ax,x0
                    678: ;      mov     bp,sp
                    679: ;      dec     WORD PTR [bp+28]        ; Decrement count
                    680: ;      jz      IdeaCFBxEnd
                    681: ;      lds     si,[bp+20]
                    682: ;      les     di,[bp+24]
                    683: ;      mov     x0,ax
                    684: ; Alternate code: (which is faster?  Two moves or three segment overrides)
                    685:        mov     si,sp
                    686:        dec     WORD PTR ss:[si+28]
                    687:        jz      IdeaCFBxEnd
                    688:        les     di,ss:[si+24]
                    689:        lds     si,ss:[si+20]
                    690: 
                    691:        lodsw
                    692:        xchg    ah,al
                    693:        xchg    x0,ax
                    694:        xor     ax,x0
                    695:        xchg    ah,al
                    696:        stosw
                    697:        lodsw
                    698:        xchg    ah,al
                    699:        xchg    x1,ax
                    700:        xor     ax,x1
                    701:        xchg    ah,al
                    702:        stosw
                    703:        lodsw
                    704:        xchg    ah,al
                    705:        xchg    x2,ax
                    706:        xor     ax,x2
                    707:        xchg    ah,al
                    708:        stosw
                    709:        lodsw
                    710:        xchg    ah,al
                    711:        xchg    dx,ax
                    712:        xor     ax,dx
                    713:        xchg    ah,al
                    714:        stosw
                    715: 
                    716: ;      mov     ax,x0
                    717: ;      mov     bp,sp
                    718: ;      mov     [bp+20],si      ; Save source offset
                    719: ;      mov     [bp+24],di      ; Save destination offset
                    720: ;      lds     si,[bp+16]      ; Key
                    721: ;      mov     x0,ax           ; Get x0 in place for another iteration
                    722: ; Alternate code for the above: (which is faster?  One move or three ss:?)
                    723:        mov     ax,si
                    724:        mov     si,sp
                    725:        mov     ss:[si+20],ax
                    726:        mov     ss:[si+24],di
                    727:        lds     si,ss:[si+16]
                    728: 
                    729:        mov     x3,dx           ; Get x3 in place
                    730:        mov     ax,OFFSET IdeaCFBxLoop
                    731:        push    ax
                    732:        jmp     Core
                    733: 
                    734: IdeaCFBxEnd:
                    735: ;      lds     si:[bp+12]
                    736:        lds     di,ss:[si+12]   ; Get IV for writing back
                    737: 
                    738:        mov     ax,x0
                    739:        xchg    ah,al
                    740:        mov     [di],ax         ; Use stosw?
                    741:        xchg    bh,bl
                    742:        xchg    ch,cl
                    743:        mov     [di+2],x1
                    744:        mov     [di+4],x2
                    745:        xchg    dh,dl
                    746:        mov     [di+6],dx
                    747: 
                    748: 
                    749:        pop     ds
                    750:        pop     di
                    751:        pop     si
                    752:        pop     bp
                    753: 
                    754:        ret
                    755: 
                    756:        endp
                    757: 
                    758: 
                    759: 
                    760: 
                    761:        end
unix.superglobalmegacorp.com
This archive runs on limited infrastructure. Preserving old code on modern bandwidth. Automated agents are requested to crawl responsibly.