1) Long Division by 9:
Data:
AL: lower address of the considered number.
AH: upper address of the considered number.
C1 = &1C71C71C.
C2 = 2*C1+1 = &38E38E39.
PM: pointer to the 9 products NA*C1 for 0 <= NA <= 8.
Temporary Registers:
NA: 4 MSBs of the current partial dividends before the division, remainder after the division.
NB: 32 LSBs of the current partial dividends before the division, quotient after the division.
TB: partial quotient (yielded by the 4 MSBs).
Unmodified Registers: AL, C1, C2, PM.
-- DIV9 --------------------------------------------------------------------
MOV NA, #0 ;Remainder = 0 at the beginning.
div9_loop LDR NB, [AH] ;LSBs of the dividend.
LDR TB, [PM, NA, LSL #2] ;\
ADDS NB, NB, NA, LSL #2 ;| We get rid of the 4 MSBs of the dividend
SUBCS NB, NB, #36 ;| --> partial quotient TB (no remainder).
ADDCS TB, TB, #4 ;/
SUB NB, NB, NB, LSL #3 ;\
ADD NB, NB, NB, LSL #6 ;| 32-bit multiplication
ADD NB, NB, NB, LSL #12 ;| NB * (2a+1).
ADD NB, NB, NB, LSL #24 ;/
CMP NB, C2
SUBCS NB, NB, C2
SBC NA, C2, C1, LSL #1 ;NA = C flag (generated by the CMP).
CMP NB, C2, LSL #1
SUBCS NB, NB, C2, LSL #1
ADDCS NA, NA, #2
CMP NB, C2
SUBCS NB, NB, C2
ADDCS NA, NA, #1
CMP C1, NB
SBCCC NB, NB, C1
ADDCC NA, NA, #5
ADD NB, NB, TB
STR NB, [AH], #-4 ;Store the quotient.
CMP AH, AL
BCS div9_loop ;Loop...
----------------------------------------------------------------------------
2) Long Division by a 32-bit Number:
Data:
AH: upper address of the dividend.
BL: lower address of the quotient.
BH: upper address of the quotient.
SL: left shift count to normalize the divisor.
SR: 32-SL.
DV: normalized divisor.
QT: pointer to the table of the 8-bit partial quotients.
PL: pointer to the table of the LSBs of the products NQ.DV.
Temporary Registers:
(NA,NB): current normalized partial dividend.
NK: will be the 32-bit partial quotient (8-bit partial quotient for stage 1).
NQ: 8-bit partial quotient (stages 2, 3, 4).
NT: temporary.
Unmodified Registers: BL, SL, SR, DV, QT, PL.
-- LDIV --------------------------------------------------------------------
GBLA ldiv_n
MOV NA, #0
ldiv_loop LDR NB, [AH], #-4 ;Next digit.
ORR NA, NA, NB, LSR SR ;\ Normalize this digit --> the partial
MOV NB, NB, LSL SL ;/ dividend (NA,NB) is normalized.
LDRB NK, [QT, NA, LSR #23] ;Minimal partial quotient (real partial quotient = NK or NK+1).
LDR NT, [PL, NK, LSL #2] ;NT = 32 LSBs of NQ.DV.
RSB NA, NT, NA, LSL #7 ;\ 8-bit shift of (NA,NB) to the left (normalization),
MOVS NA, NA, LSL #1 ;| and subtract NT at the same time (C = 1 if there is
ADD NA, NA, NB, LSR #24 ;| an overflow in the subtraction; in this case, the
MOV NB, NB, LSL #8 ;/ real quotient was NK+1).
CMPCC NA, DV
SUBCS NA, NA, DV ;Correction of the partial dividend.
ADDCS NK, NK, #1 ;Correction of the partial quotient.
ldiv_n SETA 2 ;These instructions will be assembled 3 times.
WHILE ldiv_n >= 0
LDRB NQ, [QT, NA, LSR #23] ;Cf above (idem for the following instructions).
LDR NT, [PL, NQ, LSL #2] ;Note: NQ is used instead of NK.
RSB NA, NT, NA, LSL #7
MOVS NA, NA, LSL #1
ADD NA, NA, NB, LSR #24
[ ldiv_n != 0
MOV NB, NB, LSL #8 ;This instruction is useless in the last stage.
]
CMPCC NA, DV
SUBCS NA, NA, DV ;Correction of the partial dividend.
ADC NK, NQ, NK, LSL #8 ;Correction + update of the 32-bit partial quotient.
ldiv_n SETA ldiv_n - 1
WEND
STR NK, [BH], #-4 ;Store the 32-bit partial quotient.
CMP BH, BL
BCS ldiv_loop ;Loop...
----------------------------------------------------------------------------
3) Computation of the Table of the Partial Quotients:
Data:
QT: pointer to the table that will be created.
DV: normalized divisor.
Temporary Registers:
(NA,NB): multiple of 2*DV (NA: 9 MSBs of the dividend).
NQ: current partial quotient.
Unmodified Registers: DV.
-- CQUOT -------------------------------------------------------------------
MVN NA, #0
MVN NB, #0
MOV NQ, #0
cquot_loop STRB NQ, [QT], #1
ADDS NB, NB, DV, LSL #1
ADC NA, NA, #1
STRCSB NQ, [QT], #1
ADD NQ, NQ, #1
CMP NQ, #&100
BNE cquot_loop
----------------------------------------------------------------------------
4) Computation of the Table of the Products:
Data:
PL: pointer to the table that will be created.
DV: normalized divisor.
Temporary Registers:
T1: LSBs of the current product; the 7 MSBs are not represented.
T2: counter.
Unmodified Registers: DV.
-- CPROD -------------------------------------------------------------------
MOV T1, #0
MOV T2, #256
cprod_loop STR T1, [PL], #4
ADD T1, T1, DV, LSR #1
SUBS T2, T2, #1
BNE cprod_loop
----------------------------------------------------------------------------
5) Conversion base 2 --> base 10:
Data:
AH: upper address + 4 of the binary number.
TL: lower address of the temporary decimal number T.
TH: upper address + 4 of the temporary decimal number T.
RH: upper address + 4 of the result R, = TH + 4.
Z1: &01010101.
RL: lower address where the non-interlaced result will be stored.
Temporary Registers:
T1: misc.
T2: misc.
T3: misc.
T4: misc, used in the last part.
Z5: &05050505, calculated and used in the last part.
TA: current word of the binary number.
TK: current address of the least significant non-zero digits of T.
TT: pointer to a word of T or R.
CT: counter.
Unmodified Registers: TL, RH, Z1.
Note:
T4 and Z5 must be mapped on 2 registers corresponding to AH, TH, TK, TT and CT.
The words of T must be initially equal to &FFFFFFFF.
The words of R must be initially equal to 0.
The last 4 digits of R will be equal to 0, so the lengths of the decimal numbers must
be equal to the wanted number of digits + 4 (there will still be round errors).
-- CONV --------------------------------------------------------------------
MOV T1, #&02000000
STR T1, [TH, #-4]! ;Initial value of T: 2.000.
MOV TK, TH
conv_outer1 LDR TA, [AH, #-4]! ;Read the next word of the binary number.
MOV CT, #31 ;Counter: 32 bits.
conv_inner1 MOVS TA, TA, LSL #1 ;Next bit of the binary number.
BCC conv_next1 ;Branch if zero (no addition).
MOV TT, TK ;T will be added to R.
conv_loop1 LDMIA TT!, {T1, T2} ;Read the next 4 digits of T and R.
ADD T1, T1, T2 ;Addition (without carry).
STR T1, [TT, #-4] ;Store the result.
CMP TH, TT
BCS conv_loop1 ;Loop while there still are non-zero digits of T.
conv_next1 TST CT, #7 ;Every 8 bits, the result must be "cleaned".
BNE conv_next2 ;Branch if the counter isn't a multiple of 8.
ADD TT, TK, #4 ;Least significant non-zero word of the result.
MOV T2, #0 ;Clear the carry (whose value is 0 or 8).
conv_loop2 LDR T1, [TT] ;Read the next 4 digits (a digit is in [0..199]).
ADD T1, T1, T2 ;Add the carry.
AND T2, T1, Z1, LSL #7 ;Bits 31, 23, 15, 7: 1 when the corresponding digit is >= 128.
ADD T1, T1, T2, LSL #4 ;\ Add 8 (carry) to the next digit when the bit has the
SUB T1, T1, T2, LSR #1 ;| value 1 (except for bit 31), and subtract 80 from the
SUB T1, T1, T2, LSR #3 ;/ digits corresponding to the non-zero bits.
MOV T2, T2, LSR #28 ;Carry: 8 if bit 31 had the value 1, otherwise 0.
CMP TH, TT
STR T1, [TT], #8 ;Store the result.
BCS conv_loop2 ;Loop while the most significant non-zero word of T was not reached...
CMP T2, #0
BNE conv_loop2 ;and while the carry is not 0.
conv_next2 MOV TT, TH ;T will be divided by 2.
MOV T2, #0 ;T2: "backcarry": 0 or &05000000.
LDR T1, [TT] ;Read the most significant word of T.
CMP T1, #1
SUBLS TH, TH, #8 ;Decrease TH if this word will become zero.
CMP TH, TL
BEQ conv_norm ;Branch if T will become zero (end of the conversion).
conv_loop3 MOVS T2, T2, LSR #27 ;C = backcarry, and T2 = 0.
MOVS T1, T1, RRX ;Divide by 2 (backcarries in bits 31, 23, 15, 7 and C).
AND T3, Z1, T1, LSR #7 ;T3: backcarries in bits 24, 16, 8 and 0.
BIC T1, T1, Z1, LSL #7 ;Clear the backcarries in T1.
ADD T3, T3, T3, LSL #2
ADD T1, T1, T3 ;Add 5 where there are backcarries.
STR T1, [TT], #-8 ;Store the result.
MOVCS T2, #&05000000 ;New backcarry.
LDR T1, [TT] ;Read the next 4 digits.
TEQ T1, #0
BPL conv_loop3 ;Loop if there are real digits (not &FFFFFFFF).
TEQ T2, #0 ;If backcarry = 0, no digit is concatenated to the number.
CMPNE TT, TL ;Idem if the wanted precision has been reached.
STRNE T2, [TK, #-8]! ;Otherwise digits 5, 0, 0, 0 are concatenated.
SUBS CT, CT, #1
BCS conv_inner1 ;Loop if all the bits of the word TA have not been read.
B conv_outer1 ;Loop.
conv_norm ADD TA, TL, #12 ;The result will be normalized and copied at RL.
ADD Z5, Z1, Z1, LSL #2 ;T5 = &05050505.
MOV T2, #0 ;Clear the carry.
conv_outer2 LDR T1, [TA], #8 ;Read the next 4 digits.
ADD T1, T1, T2 ;Add the carry.
MOV T2, #0 ;Clear the new carry.
AND T3, T1, Z1, LSL #7
AND T4, T1, Z1, LSL #6
ORRS T3, T3, T4, LSL #1 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 64.
conv_inner2 ADDMI T2, T2, #6 ;Add 6 to the carry if the most significant digit is >= 64.
ADD T3, T3, T3, LSR #1
ADD T1, T1, T3, LSL #3 ;Add 6 to the next digits when the bit is 1.
ADD T3, T3, T3, LSR #2 ;\ Subtract 60 when
SUB T1, T1, T3, LSR #2 ;/ the bit is 1.
AND T3, T1, Z1, LSL #7
AND T4, T1, Z1, LSL #6
ORRS T3, T3, T4, LSL #1 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 64.
BNE conv_inner2 ;Loop while at least one bit is 1.
ORR T3, T1, Z1, LSL #7 ;T3: set bits 31, 23, 15, 7.
SUB T3, T3, Z5, LSL #3 ;Subtract 40 from each digit (in T3).
ANDS T3, T3, Z1, LSL #7 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 40.
ADDMI T2, T2, #4 ;Add 4 to the carry if the most significant digit is >= 40.
ADD T1, T1, T3, LSL #3 ;Add 4 to the next digits when the bit is 1.
SUB T1, T1, T3, LSR #2 ;\ Subtract 40 when
SUB T1, T1, T3, LSR #4 ;/ the bit is 1.
ORR T3, T1, Z1, LSL #7 ;T3: set bits 31, 23, 15, 7.
SUB T3, T3, Z5, LSL #2 ;Subtract 20 from each digit (in T3).
ANDS T3, T3, Z1, LSL #7 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 20.
ADDMI T2, T2, #2 ;Add 2 to the carry if the most significant digit is >= 20.
ADD T1, T1, T3, LSL #2 ;Add 2 to the next digits when the bit is 1.
SUB T1, T1, T3, LSR #3 ;\ Subtract 20 when
SUB T1, T1, T3, LSR #5 ;/ the bit is 1.
ORR T3, T1, Z1, LSL #7 ;T3: set bits 31, 23, 15, 7.
SUB T3, T3, Z5, LSL #1 ;Subtract 10 from each digit (in T3).
ANDS T3, T3, Z1, LSL #7 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 10.
conv_inner3 ADDMI T2, T2, #1 ;Add 1 to the carry if the most significant digit is >= 10.
ADD T1, T1, T3, LSL #1 ;Add 1 to the next digits when the bit is 1.
SUB T1, T1, T3, LSR #4 ;\ Subtract 10 when
SUB T1, T1, T3, LSR #6 ;/ the bit is 1.
ORR T3, T1, Z1, LSL #7 ;T3: set bits 31, 23, 15, 7.
SUB T3, T3, Z5, LSL #1 ;Subtract 10 from each digit (in T3).
ANDS T3, T3, Z1, LSL #7 ;Bits 31, 23, 15, 7: 1 iff the corresponding digit is >= 10.
BNE conv_inner3 ;Branch if at least one bit is 1 (prob. 1/10).
STR T1, [RL], #4 ;Store the result.
CMP RH, TA
BCS conv_outer2 ;Loop while there still are digits.
----------------------------------------------------------------------------
6) Main program:
-- MAIN --------------------------------------------------------------------
SP RN 13
BB RN 2
TL RN 3
TH RN 4
RL RN 5
QT RN 6
NN RN 7
SL RN 8
T1 RN 12
T2 RN 11
T3 RN 10
T4 RN 9
T5 RN 8
T6 RN 7
start ADD BB, R0, R0, LSR #2 ;BB = 5 * R0 / 4. BB will be divided by 3.
ADD T1, BB, BB, LSL #2 ;\
ADD T1, T1, T1, LSL #4 ;|
ADD T1, T1, T1, LSL #8 ;| Multiply BB by &AAAAAAAB = 3^(-1) in Z/32Z.
ADD T1, T1, T1, LSL #16 ;|
ADD BB, BB, T1, LSL #1 ;/
LDR T2, fv ;T2 = &55555555.
RSBS T1, BB, T2, LSL #1
SBCCC BB, BB, T2, LSL #1 ;If (BB >= &AAAAAAAB), BB -= &AAAAAAAB.
RSBS T1, BB, T2
SBCCC BB, BB, T2 ;If (BB >= &55555556), BB -= &55555556.
ADD BB, BB, #3 ;BB = 5 * R0 / 12 + 3.
BIC BB, BB, #3 ;BB final value.
ADD TL, R1, BB ;TL = R1 + B.
ADD TH, TL, BB
SUB TH, TH, #4 ;TH = R1 + 2 * B - 4.
STMFD SP!, {R0, LR}
ADD R0, R0, #4
ADD RL, R1, R0, LSL #1 ;RL = R1 + 2 * (N + 4).
ADD QT, RL, BB ;QT = RL + B.
MOV NN, #3 ;Initial divisor: NN = 3.
MOV SL, #30 ;Shift count: SL = 30.
MOV T1, TL
MOV T3, RL
init_loop STR T2, [T1], #4 ;T = 4/3 = 01010101...
STR T2, [T3], #4 ;R = 4/3 = 01010101...
CMP QT, T3
BNE init_loop
ORR T2, T2, #&80000000 ;T2 = 1101010101...
STR T2, [T3, #-4] ;R = 4 (1/2 + 1/3) = 1101010101...
outer CMP TH, TL
BCC ldiv ;Branch if T = 0 (no division by 9).
LDR T2, c2 ;T2 = &38E38E39.
MOV T1, T2, LSR #1 ;T1 = &1C71C71C.
ADR T3, pm ;Pointer to the T1*i's where i is in [0..8].
STMFD SP!, {TH, SL}
AL=TL, AH=TH, C1=T1, C2=T2, PM=T3, NA=T4, NB=T5, TB=LR
LDMFD SP!, {TH, SL}
LDR T1, [TH]
CMP T1, #0
SUBEQ TH, TH, #4 ;Update TH.
ldiv MOV T3, NN, LSL SL ;T3: normalized divisor.
QT=QT, DV=T3, NA=T1, NB=T2, NQ=T4
ADD QT, RL, BB ;Restore QT (which was modified by CQUOT).
ADD T4, QT, #512
PL=T4, DV=T3, T1=T1, T2=T2
MOV T4, NN, LSR #5 ;T4: address + 4 where the most significant
SUB T4, TL, T4, LSL #2 ;word of the quotient will be stored.
CMP T4, R1 ;Branch to conversion if the address is less than
BLS conv ;the address of the least significant word.
SUB T4, T4, #4
MOV T1, #1
MOV T1, T1, ROR NN ;T1 = 2^k: number that will be added to [T4, BB].
LDR T2, [T4, BB]
STMFD SP!, {BB, TL, TH, RL, NN, T2, T4}
ORR T2, T2, T1
STR T2, [T4, BB] ;[R1+BB..T4+BB]: 1/2^n + 1/3^n.
ADD TH, T4, BB
RSB LR, SL, #32
ADD TL, QT, #512
AH=TH, BL=R1, BH=T4, SL=SL, SR=LR, DV=T3, QT=QT, PL=TL, NA=T1, NB=T2, NK=NN, NQ=BB, NT=RL
LDMFD SP!, {BB, TL, TH, RL, NN, T2, T4}
STR T2, [T4, BB] ;Restore [T4, BB].
STMFD SP!, {R1, RL}
TST NN, #2
BNE subtract
MOV T3, #0 ;The quotient will be added to the result.
add_loop1 MOVS T3, T3, LSR #1
LDR T1, [R1], #4
LDR T2, [RL]
ADCS T2, T2, T1
STR T2, [RL], #4
ADC T3, T3, T3
CMP T4, R1
BCS add_loop1
MOVS T3, T3, LSR #1
BCC addsub_end
add_loop2 LDR T2, [RL]
ADDS T2, T2, #1
STR T2, [RL], #4
BCC addsub_end
B add_loop2
subtract MOV T3, #1 ;The quotient will be subtracted from the result.
sub_loop1 MOVS T3, T3, LSR #1
sub_loop2 LDR T1, [R1], #4
LDR T2, [RL]
SBCS T2, T2, T1
STR T2, [RL], #4
ADC T3, T3, T3
CMP T4, R1
BCS sub_loop1
MOVS T3, T3, LSR #1
BCC sub_loop2
addsub_end LDMFD SP!, {R1, RL}
ADD NN, NN, #2 ;Next divisor.
MOVS T3, NN, LSL SL
BCC outer
SUB SL, SL, #1 ;SL = new shift count.
B outer
conv MOV TL, R1
ADD RL, R1, R0, LSL #1
SUB TH, RL, #4
MOV T1, R1
MOV T2, #0
MVN T3, #0
ff_loop STMIA T1!, {T3, T2} ;T3 (= R10) stored before T2 (= R11).
CMP T1, TH
BLS ff_loop
LDR T6, z1
AH=QT, TL=TL, TH=TH, RL=R1, RH=RL, Z1=T6, T1=T1, T2=T2, T3=T3, T4=T4, Z5=T5, TA=LR, TT=T5, CT=BB, TK=T4
LDMFD SP!, {R0, PC}
z1 DCD &01010101
fv DCD &55555555
c2 DCD &38E38E39
pm DCD 0,&1C71C71C,&38E38E38,&55555554,&71C71C70,&8E38E38C,&AAAAAAA8,&C71C71C4,&E38E38E0
----------------------------------------------------------------------------