ARM assembly language: Use of the floating point unit

This demonstrate the simple use of the floating-point unit and printing of floating point vaues - works on the Raspberry Pi

.section .rodata
string:
 .asciz "pi (%f) times e (%f) is %f\n"
string2:
 .asciz "float (%f)\n"
.text
.global main
.type main, %function
main:
 stmfd sp!, {fp, lr}    @ save the frame pointer (r11) and link register (r14) to stack
 sub sp, sp, #16        @ allocate space on stack for 16 bytes (stack grows downwards)

 ldr r3, const_pi       @ put the address of const_pi in r3
 str r3, [r1]           @ store this address (in r3) to the address pointed to by r1
 flds s14, [r1]         @ load a 32 bit word (s14) from the address pointed to by r1 (const_pi)
 fcvtds d5, s14         @ convert the single precision (s14) to double precision (d5)

 ldr r3, const_e        @ same comments as const_pi 
 str r3, [r2]
 flds s15, [r2]
 fcvtds d6, s15         @ convert the single precision (s14) to double precision (d5)  

 fmuls s15, s14, s15    @ multiply s14 (pi) by s15 (e) and store in r15
 fcvtds d7, s15         @ convert the result in s15 to a double (d7)

 fstd d6, [sp]          @ push the double d6 (const_e) on to the stack
 fstd d7, [sp, #8]      @ push the double d7 (const_pi * const_e) on to the stack
 ldr r0, =string        @ put the address of the first string in r0
 fmrrd r2, r3, d5       @ fmrrd transfers the contents of the low half of d5 into r2,
                        @ and the contents of the high half of d5 into r3.                 
 bl printf              @ we can then print 3 double precision numbers

 ldr r0, =string2     
 fmrrd r2, r3, d5      
 bl printf              @ print const_pi

 ldr r0, =string2
 fmrrd r2, r3, d6
 bl printf              @ print const_e

 ldr r0, =string2
 fmrrd r2, r3, d7
 bl printf              @ print the product of pi * e
    
 add sp, sp, #16        @ restore the stack pointer (discard the 2 double precision locals)
 ldmfd sp!, {fp, pc}    @ restore the frame pointer and link register from the stack

 mov r0, #0
 mov r7, #1             @ set r7 to 1 - the syscall for exit
 swi 0                  @ then invoke the syscall from linux

 .align 2
const_pi:
 .float 3.1415926
const_e:
 .float 2.718281

bob@sweden:~/src/asm$ gcc -gstabs -o fp_mul fp_mul.s
bob@sweden:~/src/asm$ ./fp_mul
pi (3.141593) times e (2.718281) is 8.539731
float (3.141593)
float (2.718281)
float (8.539731)
bob@sweden:~/src/asm$

printf requires that the first double precision number is stored in the 2 integer registers, r2 and r3, and any subsequent double precision numbers are pushed on to the stack. This program demonstrates this usage.

Using gdb can show more details

(gdb) b main
Breakpoint 1 at 0x83d0: file fp_mul.s, line 11.
(gdb) run
Starting program: /home/bob/src/asm/fp_mul 

Breakpoint 1, main () at fp_mul.s:11
11  sub sp, sp, #16
(gdb) i r
r0             0x1 1
r1             0xbefffd24 3204447524
r2             0xbefffd2c 3204447532
r3             0x83cc 33740
r4             0x0 0
r5             0x0 0
r6             0x8320 33568
r7             0x0 0
r8             0x0 0
r9             0x0 0
r10            0xb6fff000 3070226432
r11            0x0 0
r12            0xb6fb5000 3069923328
sp             0xbefffbd0 0xbefffbd0
lr             0xb6ea181c -1226172388
pc             0x83d0 0x83d0 
cpsr           0x60000010 1610612752
(gdb) s
13  ldr r3, const_pi
(gdb) i r r3
r3             0x83cc 33740
(gdb) s
14  str r3, [r1]
(gdb) i r r3
r3             0x40490fda 1078530010
(gdb) i r r1
r1             0xbefffd24 3204447524
(gdb) s
15  flds s14, [r1]
(gdb) s
16  fcvtds d5, s14
(gdb) i r s14
s14            3.1415925 (raw 0x40490fda)
(gdb) x/4x const_pi
0x8448 : 0xda 0x0f 0x49 0x40
(gdb) i r r1
r1             0xbefffd24 3204447524
(gdb) i r r3
r3             0x40490fda 1078530010
(gdb) x/4x 0xbefffd24
0xbefffd24: 0xda 0x0f 0x49 0x40
(gdb) s
18  ldr r3, const_e
(gdb) 
19  str r3, [r2]
(gdb) 
20  flds s15, [r2]
(gdb) 
21  fcvtds d6, s15
(gdb) 
23  fmuls s15, s14, s15
(gdb) i r s14
s14            3.1415925 (raw 0x40490fda)
(gdb) i r s15
s15            2.71828103 (raw 0x402df851)
(gdb) s
24  fcvtds d7, s15
(gdb) i r s15
s15            8.53973103 (raw 0x4108a2bd)
(gdb) s
26  fstd d6, [sp]
(gdb) i r d7
d7             8.5397310256958008 (raw 0x40211457a0000000)
(gdb) s
27  fstd d7, [sp, #8]
(gdb) s
28  ldr r0, =string
(gdb) s
29  fmrrd r2, r3, d5
(gdb) s
30  bl printf
(gdb) i r r2
r2             0x40000000 1073741824
(gdb) i r r3
r3             0x400921fb 1074340347
(gdb) i r d5
d5             3.1415925025939941 (raw 0x400921fb40000000)
(gdb)

I do think that the code may be more robust if we were to define some storage space for the pointers r1 and r2 - if these were null when we enter the routine we would dereference null pointers which is never good. The registers, r0-r3 are specified by as temporary registers by the calling convention so we are at the mercy of the contents of these registers from the previous call?

ARM assembly language

Saturday 22 June 2013

Use of the floating point unit

No comments:

Post a Comment