This demonstrate the simple use of the floating-point unit and printing of floating point vaues - works on the Raspberry Pi
.section .rodata
string:
 .asciz "pi (%f) times e (%f) is %f\n"
string2:
 .asciz "float (%f)\n"
.text
.global main
.type main, %function
main:
 stmfd sp!, {fp, lr}    @ save the frame pointer (r11) and link register (r14) to stack
 sub sp, sp, #16        @ allocate space on stack for 16 bytes (stack grows downwards)
 ldr r3, const_pi       @ put the address of const_pi in r3
 str r3, [r1]           @ store this address (in r3) to the address pointed to by r1
 flds s14, [r1]         @ load a 32 bit word (s14) from the address pointed to by r1 (const_pi)
 fcvtds d5, s14         @ convert the single precision (s14) to double precision (d5)
 ldr r3, const_e        @ same comments as const_pi 
 str r3, [r2]
 flds s15, [r2]
 fcvtds d6, s15         @ convert the single precision (s14) to double precision (d5)  
 fmuls s15, s14, s15    @ multiply s14 (pi) by s15 (e) and store in r15
 fcvtds d7, s15         @ convert the result in s15 to a double (d7)
 fstd d6, [sp]          @ push the double d6 (const_e) on to the stack
 fstd d7, [sp, #8]      @ push the double d7 (const_pi * const_e) on to the stack
 ldr r0, =string        @ put the address of the first string in r0
 fmrrd r2, r3, d5       @ fmrrd transfers the contents of the low half of d5 into r2,
                        @ and the contents of the high half of d5 into r3.                 
 bl printf              @ we can then print 3 double precision numbers
 ldr r0, =string2     
 fmrrd r2, r3, d5      
 bl printf              @ print const_pi
 ldr r0, =string2
 fmrrd r2, r3, d6
 bl printf              @ print const_e
 ldr r0, =string2
 fmrrd r2, r3, d7
 bl printf              @ print the product of pi * e
    
 add sp, sp, #16        @ restore the stack pointer (discard the 2 double precision locals)
 ldmfd sp!, {fp, pc}    @ restore the frame pointer and link register from the stack
 mov r0, #0
 mov r7, #1             @ set r7 to 1 - the syscall for exit
 swi 0                  @ then invoke the syscall from linux
 .align 2
const_pi:
 .float 3.1415926
const_e:
 .float 2.718281
bob@sweden:~/src/asm$ gcc -gstabs -o fp_mul fp_mul.s bob@sweden:~/src/asm$ ./fp_mul pi (3.141593) times e (2.718281) is 8.539731 float (3.141593) float (2.718281) float (8.539731) bob@sweden:~/src/asm$
printf requires that the first double precision number is stored in the 2 integer registers, r2 and r3, and any subsequent double precision numbers are pushed on to the stack. This program demonstrates this usage.
Using gdb can show more details
(gdb) b main Breakpoint 1 at 0x83d0: file fp_mul.s, line 11. (gdb) run Starting program: /home/bob/src/asm/fp_mul Breakpoint 1, main () at fp_mul.s:11 11 sub sp, sp, #16 (gdb) i r r0 0x1 1 r1 0xbefffd24 3204447524 r2 0xbefffd2c 3204447532 r3 0x83cc 33740 r4 0x0 0 r5 0x0 0 r6 0x8320 33568 r7 0x0 0 r8 0x0 0 r9 0x0 0 r10 0xb6fff000 3070226432 r11 0x0 0 r12 0xb6fb5000 3069923328 sp 0xbefffbd0 0xbefffbd0 lr 0xb6ea181c -1226172388 pc 0x83d0 0x83d0cpsr 0x60000010 1610612752 (gdb) s 13 ldr r3, const_pi (gdb) i r r3 r3 0x83cc 33740 (gdb) s 14 str r3, [r1] (gdb) i r r3 r3 0x40490fda 1078530010 (gdb) i r r1 r1 0xbefffd24 3204447524 (gdb) s 15 flds s14, [r1] (gdb) s 16 fcvtds d5, s14 (gdb) i r s14 s14 3.1415925 (raw 0x40490fda) (gdb) x/4x const_pi 0x8448 : 0xda 0x0f 0x49 0x40 (gdb) i r r1 r1 0xbefffd24 3204447524 (gdb) i r r3 r3 0x40490fda 1078530010 (gdb) x/4x 0xbefffd24 0xbefffd24: 0xda 0x0f 0x49 0x40 (gdb) s 18 ldr r3, const_e (gdb) 19 str r3, [r2] (gdb) 20 flds s15, [r2] (gdb) 21 fcvtds d6, s15 (gdb) 23 fmuls s15, s14, s15 (gdb) i r s14 s14 3.1415925 (raw 0x40490fda) (gdb) i r s15 s15 2.71828103 (raw 0x402df851) (gdb) s 24 fcvtds d7, s15 (gdb) i r s15 s15 8.53973103 (raw 0x4108a2bd) (gdb) s 26 fstd d6, [sp] (gdb) i r d7 d7 8.5397310256958008 (raw 0x40211457a0000000) (gdb) s 27 fstd d7, [sp, #8] (gdb) s 28 ldr r0, =string (gdb) s 29 fmrrd r2, r3, d5 (gdb) s 30 bl printf (gdb) i r r2 r2 0x40000000 1073741824 (gdb) i r r3 r3 0x400921fb 1074340347 (gdb) i r d5 d5 3.1415925025939941 (raw 0x400921fb40000000) (gdb) 
I do think that the code may be more robust if we were to define some storage space for the pointers r1 and r2 - if these were null when we enter the routine we would dereference null pointers which is never good. The registers, r0-r3 are specified by as temporary registers by the calling convention so we are at the mercy of the contents of these registers from the previous call?
No comments:
Post a Comment