Saturday 22 June 2013

Use of the floating point unit

This demonstrate the simple use of the floating-point unit and printing of floating point vaues - works on the Raspberry Pi

.section .rodata
string:
 .asciz "pi (%f) times e (%f) is %f\n"
string2:
 .asciz "float (%f)\n"
.text
.global main
.type main, %function
main:
 stmfd sp!, {fp, lr}    @ save the frame pointer (r11) and link register (r14) to stack
 sub sp, sp, #16        @ allocate space on stack for 16 bytes (stack grows downwards)

 ldr r3, const_pi       @ put the address of const_pi in r3
 str r3, [r1]           @ store this address (in r3) to the address pointed to by r1
 flds s14, [r1]         @ load a 32 bit word (s14) from the address pointed to by r1 (const_pi)
 fcvtds d5, s14         @ convert the single precision (s14) to double precision (d5)

 ldr r3, const_e        @ same comments as const_pi 
 str r3, [r2]
 flds s15, [r2]
 fcvtds d6, s15         @ convert the single precision (s14) to double precision (d5)  

 fmuls s15, s14, s15    @ multiply s14 (pi) by s15 (e) and store in r15
 fcvtds d7, s15         @ convert the result in s15 to a double (d7)

 fstd d6, [sp]          @ push the double d6 (const_e) on to the stack
 fstd d7, [sp, #8]      @ push the double d7 (const_pi * const_e) on to the stack
 ldr r0, =string        @ put the address of the first string in r0
 fmrrd r2, r3, d5       @ fmrrd transfers the contents of the low half of d5 into r2,
                        @ and the contents of the high half of d5 into r3.                 
 bl printf              @ we can then print 3 double precision numbers

 ldr r0, =string2     
 fmrrd r2, r3, d5      
 bl printf              @ print const_pi

 ldr r0, =string2
 fmrrd r2, r3, d6
 bl printf              @ print const_e

 ldr r0, =string2
 fmrrd r2, r3, d7
 bl printf              @ print the product of pi * e
    
 add sp, sp, #16        @ restore the stack pointer (discard the 2 double precision locals)
 ldmfd sp!, {fp, pc}    @ restore the frame pointer and link register from the stack

 mov r0, #0
 mov r7, #1             @ set r7 to 1 - the syscall for exit
 swi 0                  @ then invoke the syscall from linux

 .align 2
const_pi:
 .float 3.1415926
const_e:
 .float 2.718281
bob@sweden:~/src/asm$ gcc -gstabs -o fp_mul fp_mul.s
bob@sweden:~/src/asm$ ./fp_mul
pi (3.141593) times e (2.718281) is 8.539731
float (3.141593)
float (2.718281)
float (8.539731)
bob@sweden:~/src/asm$

printf requires that the first double precision number is stored in the 2 integer registers, r2 and r3, and any subsequent double precision numbers are pushed on to the stack. This program demonstrates this usage.

Using gdb can show more details

(gdb) b main
Breakpoint 1 at 0x83d0: file fp_mul.s, line 11.
(gdb) run
Starting program: /home/bob/src/asm/fp_mul 

Breakpoint 1, main () at fp_mul.s:11
11  sub sp, sp, #16
(gdb) i r
r0             0x1 1
r1             0xbefffd24 3204447524
r2             0xbefffd2c 3204447532
r3             0x83cc 33740
r4             0x0 0
r5             0x0 0
r6             0x8320 33568
r7             0x0 0
r8             0x0 0
r9             0x0 0
r10            0xb6fff000 3070226432
r11            0x0 0
r12            0xb6fb5000 3069923328
sp             0xbefffbd0 0xbefffbd0
lr             0xb6ea181c -1226172388
pc             0x83d0 0x83d0 
cpsr           0x60000010 1610612752
(gdb) s
13  ldr r3, const_pi
(gdb) i r r3
r3             0x83cc 33740
(gdb) s
14  str r3, [r1]
(gdb) i r r3
r3             0x40490fda 1078530010
(gdb) i r r1
r1             0xbefffd24 3204447524
(gdb) s
15  flds s14, [r1]
(gdb) s
16  fcvtds d5, s14
(gdb) i r s14
s14            3.1415925 (raw 0x40490fda)
(gdb) x/4x const_pi
0x8448 : 0xda 0x0f 0x49 0x40
(gdb) i r r1
r1             0xbefffd24 3204447524
(gdb) i r r3
r3             0x40490fda 1078530010
(gdb) x/4x 0xbefffd24
0xbefffd24: 0xda 0x0f 0x49 0x40
(gdb) s
18  ldr r3, const_e
(gdb) 
19  str r3, [r2]
(gdb) 
20  flds s15, [r2]
(gdb) 
21  fcvtds d6, s15
(gdb) 
23  fmuls s15, s14, s15
(gdb) i r s14
s14            3.1415925 (raw 0x40490fda)
(gdb) i r s15
s15            2.71828103 (raw 0x402df851)
(gdb) s
24  fcvtds d7, s15
(gdb) i r s15
s15            8.53973103 (raw 0x4108a2bd)
(gdb) s
26  fstd d6, [sp]
(gdb) i r d7
d7             8.5397310256958008 (raw 0x40211457a0000000)
(gdb) s
27  fstd d7, [sp, #8]
(gdb) s
28  ldr r0, =string
(gdb) s
29  fmrrd r2, r3, d5
(gdb) s
30  bl printf
(gdb) i r r2
r2             0x40000000 1073741824
(gdb) i r r3
r3             0x400921fb 1074340347
(gdb) i r d5
d5             3.1415925025939941 (raw 0x400921fb40000000)
(gdb)

I do think that the code may be more robust if we were to define some storage space for the pointers r1 and r2 - if these were null when we enter the routine we would dereference null pointers which is never good. The registers, r0-r3 are specified by as temporary registers by the calling convention so we are at the mercy of the contents of these registers from the previous call?

No comments:

Post a Comment