From f013ad57ef7fea6fd6a07ef136fb63f02eb24b28 Mon Sep 17 00:00:00 2001 From: "monty@mashka.mysql.fi" <> Date: Thu, 31 Oct 2002 11:27:52 +0200 Subject: [PATCH] 30 % faster longlong10_to_str function --- strings/longlong2str-x86.s | 95 +++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 6 deletions(-) diff --git a/strings/longlong2str-x86.s b/strings/longlong2str-x86.s index 98e60acbafb..3bfd1777e18 100644 --- a/strings/longlong2str-x86.s +++ b/strings/longlong2str-x86.s @@ -64,7 +64,7 @@ longlong2str: jne .L150 movb $48,(%edi) incl %edi - jmp .L164 + jmp .L10_end .align 4 .L150: @@ -81,9 +81,9 @@ longlong2str: movl %eax,%ebp movl %esi,%eax divl %ebx + decl %ecx movl %eax,%esi # quotent in ebp:esi movb _dig_vec(%edx),%al # al is faster than dl - decl %ecx movb %al,(%ecx) # store value in buff .align 4 .L155: @@ -91,7 +91,7 @@ longlong2str: ja .L153 testl %esi,%esi # rest value jl .L153 - je .L160 # Ready + je .L10_mov # Ready movl %esi,%eax movl $_dig_vec,%ebp .align 4 @@ -105,14 +105,14 @@ longlong2str: movb %dl,(%ecx) jne .L154 -.L160: +.L10_mov: movl %ecx,%esi leal 92(%esp),%ecx # End of buffer subl %esi,%ecx rep movsb -.L164: +.L10_end: movl %edi,%eax # Pointer to end null movb $0,(%edi) # Store the end null @@ -131,10 +131,93 @@ longlong2str: .Lfe3: .size longlong2str,.Lfe3-longlong2str +# +# This is almost equal to the above, except that we can do the final +# loop much more efficient +# + + .align 4 +.Ltmp: + .long 0xcccccccd + .align 4 + .globl longlong10_to_str .type longlong10_str,@function longlong10_to_str: - jmp longlong2str + subl $80,%esp + pushl %ebp + pushl %esi + pushl %edi + pushl %ebx + movl 100(%esp),%esi # Lower part of val + movl 104(%esp),%ebp # Higher part of val + movl 108(%esp),%edi # get dst + movl 112(%esp),%ebx # Radix (10 or -10) + testl %ebx,%ebx + jge .L10_10 # Positive radix + + negl %ebx # Change radix to positive (= 10) + + testl %ebp,%ebp # Test if negative value + jge .L10_10 + movb $45,(%edi) # Add sign + incl %edi + negl %esi # Change sign of val (ebp:esi) + adcl $0,%ebp + negl %ebp + .align 4 + +.L10_10: + leal 92(%esp),%ecx # End of buffer + movl %esi,%eax # Test if zero (for easy loop) + orl %ebp,%eax + jne .L10_30 # Not zero + + # Here when value is zero + movb $48,(%edi) + incl %edi + jmp .L10_end + .align 4 + +.L10_20: + # val is stored in in ebp:esi + movl %ebp,%eax # High part of value + xorl %edx,%edx + divl %ebx # Divide by 10 + movl %eax,%ebp + movl %esi,%eax + divl %ebx # Divide by 10 + decl %ecx + movl %eax,%esi # quotent in ebp:esi + addl $48,%edx # Convert to ascii + movb %dl,(%ecx) # store value in buff + +.L10_30: + testl %ebp,%ebp + ja .L10_20 + testl %esi,%esi # rest value + jl .L10_20 # Unsigned, do ulonglong div once more + je .L10_mov # Ready + movl %esi,%ebx # Move val to %ebx + + # The following code uses some tricks to change division by 10 to + # multiplication and shifts + movl .Ltmp,%esi # set %esi to 0xcccccccd + +.L10_40: + movl %ebx,%eax + mull %esi + decl %ecx + shrl $3,%edx + leal (%edx,%edx,4),%eax + addl %eax,%eax + subb %al,%bl # %bl now contains val % 10 + addb $48,%bl + movb %bl,(%ecx) + movl %edx,%ebx + testl %ebx,%ebx + jne .L10_40 + jmp .L10_mov # Shared end with longlong10_to_str .L10end: .size longlong10_to_str,.L10end-longlong10_to_str