Skip to content

py/formatfloat: Fix exact int formatting on 32-bit mingw. #10267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 5, 2024

Conversation

dlech
Copy link
Contributor

@dlech dlech commented Dec 19, 2022

When compiler optimizations are enabled on the mingw version of gcc targeting 32-bit Windows, we are getting failing tests because of rounding issues, for example:

print(float("1e24"))

would print

9.999999999999999e+23

instead of

1e+24

We can work around the issue by using powl() instead of pow() in mp_format_float() on affected targets.

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

@dpgeorge dpgeorge added the py-core Relates to py/ directory in source label Dec 19, 2022
@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

I don't know x86 assembly but here is the difference this change makes.

Before (using `pow()`):
Dump of assembler code for function mp_format_float:
   0x00000000 <+0>:	push   %ebp
   0x00000001 <+1>:	mov    %esp,%ebp
   0x00000003 <+3>:	push   %edi
   0x00000004 <+4>:	push   %esi
   0x00000005 <+5>:	push   %ebx
   0x00000006 <+6>:	sub    $0x4c,%esp
   0x00000009 <+9>:	fldl   0x8(%ebp)
   0x0000000c <+12>:	cmpl   $0x7,0x14(%ebp)
   0x00000010 <+16>:	mov    0x18(%ebp),%edx
   0x00000013 <+19>:	mov    0x1c(%ebp),%ebx
   0x00000016 <+22>:	mov    0x20(%ebp),%ecx
   0x00000019 <+25>:	fstpl  -0x28(%ebp)
   0x0000001c <+28>:	ja     0x4a <mp_format_float+74>
   0x0000001e <+30>:	cmpl   $0x1,0x14(%ebp)
   0x00000022 <+34>:	jbe    0x30 <mp_format_float+48>
   0x00000024 <+36>:	mov    0x10(%ebp),%eax
   0x00000027 <+39>:	movb   $0x3f,(%eax)
   0x0000002a <+42>:	inc    %eax
   0x0000002b <+43>:	mov    %eax,0x10(%ebp)
   0x0000002e <+46>:	jmp    0x36 <mp_format_float+54>
   0x00000030 <+48>:	cmpl   $0x0,0x14(%ebp)
   0x00000034 <+52>:	je     0x3c <mp_format_float+60>
   0x00000036 <+54>:	mov    0x10(%ebp),%eax
   0x00000039 <+57>:	movb   $0x0,(%eax)
   0x0000003c <+60>:	xor    %eax,%eax
   0x0000003e <+62>:	cmpl   $0x1,0x14(%ebp)
   0x00000042 <+66>:	seta   %al
   0x00000045 <+69>:	jmp    0x5b1 <mp_format_float+1457>
   0x0000004a <+74>:	fldl   -0x28(%ebp)
   0x0000004d <+77>:	fxam   
   0x0000004f <+79>:	fstsw  %ax
   0x00000052 <+82>:	test   $0x2,%ah
   0x00000055 <+85>:	je     0x76 <mp_format_float+118>
   0x00000057 <+87>:	fxam   
   0x00000059 <+89>:	fstsw  %ax
   0x0000005c <+92>:	and    $0x4500,%ax
   0x00000060 <+96>:	cmp    $0x100,%ax
   0x00000064 <+100>:	je     0x7a <mp_format_float+122>
   0x00000066 <+102>:	mov    0x10(%ebp),%eax
   0x00000069 <+105>:	fchs   
   0x0000006b <+107>:	movb   $0x2d,(%eax)
   0x0000006e <+110>:	lea    0x1(%eax),%esi
   0x00000071 <+113>:	fstpl  -0x28(%ebp)
   0x00000074 <+116>:	jmp    0x8a <mp_format_float+138>
   0x00000076 <+118>:	fstp   %st(0)
   0x00000078 <+120>:	jmp    0x7c <mp_format_float+124>
   0x0000007a <+122>:	fstp   %st(0)
   0x0000007c <+124>:	mov    0x10(%ebp),%esi
   0x0000007f <+127>:	test   %cl,%cl
   0x00000081 <+129>:	je     0x8a <mp_format_float+138>
   0x00000083 <+131>:	mov    %esi,%eax
   0x00000085 <+133>:	lea    0x1(%esi),%esi
   0x00000088 <+136>:	mov    %cl,(%eax)
   0x0000008a <+138>:	mov    %edx,%eax
   0x0000008c <+140>:	fldl   -0x28(%ebp)
   0x0000008f <+143>:	and    $0x20,%eax
   0x00000092 <+146>:	mov    %al,-0x2a(%ebp)
   0x00000095 <+149>:	fxam   
   0x00000097 <+151>:	fstsw  %ax
   0x0000009a <+154>:	fstp   %st(0)
   0x0000009c <+156>:	and    $0x4500,%ax
   0x000000a0 <+160>:	cmp    $0x500,%ax
   0x000000a4 <+164>:	jne    0xbd <mp_format_float+189>
   0x000000a6 <+166>:	mov    -0x2a(%ebp),%bl
   0x000000a9 <+169>:	mov    -0x2a(%ebp),%cl
   0x000000ac <+172>:	lea    0x3(%esi),%eax
   0x000000af <+175>:	mov    -0x2a(%ebp),%dl
   0x000000b2 <+178>:	xor    $0x49,%ebx
   0x000000b5 <+181>:	xor    $0x4e,%ecx
   0x000000b8 <+184>:	xor    $0x46,%edx
   0x000000bb <+187>:	jmp    0xe2 <mp_format_float+226>
   0x000000bd <+189>:	fldl   -0x28(%ebp)
   0x000000c0 <+192>:	fxam   
   0x000000c2 <+194>:	fstsw  %ax
   0x000000c5 <+197>:	fstp   %st(0)
   0x000000c7 <+199>:	and    $0x4500,%ax
   0x000000cb <+203>:	cmp    $0x100,%ax
   0x000000cf <+207>:	jne    0xf2 <mp_format_float+242>
   0x000000d1 <+209>:	mov    -0x2a(%ebp),%dl
   0x000000d4 <+212>:	mov    -0x2a(%ebp),%cl
   0x000000d7 <+215>:	lea    0x3(%esi),%eax
   0x000000da <+218>:	xor    $0x4e,%edx
   0x000000dd <+221>:	xor    $0x41,%ecx
   0x000000e0 <+224>:	mov    %edx,%ebx
   0x000000e2 <+226>:	mov    %bl,(%esi)
   0x000000e4 <+228>:	mov    %cl,0x1(%esi)
   0x000000e7 <+231>:	mov    %dl,0x2(%esi)
   0x000000ea <+234>:	movb   $0x0,(%eax)
   0x000000ed <+237>:	jmp    0x5ae <mp_format_float+1454>
   0x000000f2 <+242>:	mov    %edx,%eax
   0x000000f4 <+244>:	or     $0x20,%eax
   0x000000f7 <+247>:	cmp    $0x67,%al
   0x000000f9 <+249>:	mov    %al,-0x29(%ebp)
   0x000000fc <+252>:	sete   -0x2b(%ebp)
   0x00000100 <+256>:	test   %ebx,%ebx
   0x00000102 <+258>:	js     0x120 <mp_format_float+288>
   0x00000104 <+260>:	movzbl -0x2b(%ebp),%edi
   0x00000108 <+264>:	sete   %al
   0x0000010b <+267>:	and    -0x2b(%ebp),%al
   0x0000010e <+270>:	cmovne %eax,%edi
   0x00000111 <+273>:	mov    %edi,%eax
   0x00000113 <+275>:	mov    %al,-0x2b(%ebp)
   0x00000116 <+278>:	mov    $0x1,%eax
   0x0000011b <+283>:	cmovne %eax,%ebx
   0x0000011e <+286>:	jmp    0x125 <mp_format_float+293>
   0x00000120 <+288>:	mov    $0x6,%ebx
   0x00000125 <+293>:	fldz   
   0x00000127 <+295>:	fldl   -0x28(%ebp)
   0x0000012a <+298>:	mov    0x14(%ebp),%edi
   0x0000012d <+301>:	mov    %esi,%eax
   0x0000012f <+303>:	sub    0x10(%ebp),%eax
   0x00000132 <+306>:	sub    %eax,%edi
   0x00000134 <+308>:	fucomip %st(1),%st
   0x00000136 <+310>:	fstp   %st(0)
   0x00000138 <+312>:	mov    %edi,-0x30(%ebp)
   0x0000013b <+315>:	lea    -0x1(%edi),%edi
   0x0000013e <+318>:	jp     0x187 <mp_format_float+391>
   0x00000140 <+320>:	jne    0x187 <mp_format_float+391>
   0x00000142 <+322>:	cmpb   $0x66,-0x29(%ebp)
   0x00000146 <+326>:	jne    0x15e <mp_format_float+350>
   0x00000148 <+328>:	lea    0x1(%ebx),%eax
   0x0000014b <+331>:	cmp    %edi,%eax
   0x0000014d <+333>:	jl     0x155 <mp_format_float+341>
   0x0000014f <+335>:	mov    -0x30(%ebp),%ebx
   0x00000152 <+338>:	sub    $0x3,%ebx
   0x00000155 <+341>:	lea    0x1(%ebx),%ecx
   0x00000158 <+344>:	xor    %eax,%eax
   0x0000015a <+346>:	xor    %edi,%edi
   0x0000015c <+348>:	jmp    0x17a <mp_format_float+378>
   0x0000015e <+350>:	lea    0x5(%ebx),%eax
   0x00000161 <+353>:	cmp    %edi,%eax
   0x00000163 <+355>:	jl     0x16b <mp_format_float+363>
   0x00000165 <+357>:	mov    -0x30(%ebp),%ebx
   0x00000168 <+360>:	sub    $0x7,%ebx
   0x0000016b <+363>:	xor    %edi,%edi
   0x0000016d <+365>:	xor    %eax,%eax
   0x0000016f <+367>:	xor    %ecx,%ecx
   0x00000171 <+369>:	mov    $0x2b,%dl
   0x00000173 <+371>:	cmpb   $0x65,-0x29(%ebp)
   0x00000177 <+375>:	cmove  %edx,%edi
   0x0000017a <+378>:	xor    %edx,%edx
   0x0000017c <+380>:	mov    %edx,-0x30(%ebp)
   0x0000017f <+383>:	mov    %edx,-0x20(%ebp)
   0x00000182 <+386>:	jmp    0x37a <mp_format_float+890>
   0x00000187 <+391>:	fnstcw -0x1a(%ebp)
   0x0000018a <+394>:	mov    -0x24(%ebp),%edx
   0x0000018d <+397>:	fldl   0x40
   0x00000193 <+403>:	mov    %edx,%eax
   0x00000195 <+405>:	shr    $0x14,%eax
   0x00000198 <+408>:	and    $0x7ff,%eax
   0x0000019d <+413>:	sub    $0x3ff,%eax
   0x000001a2 <+418>:	mov    %eax,-0x20(%ebp)
   0x000001a5 <+421>:	mov    -0x1a(%ebp),%ax
   0x000001a9 <+425>:	fimull -0x20(%ebp)
   0x000001ac <+428>:	or     $0xc,%ah
   0x000001af <+431>:	mov    %ax,-0x1c(%ebp)
   0x000001b3 <+435>:	fldcw  -0x1c(%ebp)
   0x000001b6 <+438>:	fistpl -0x20(%ebp)
   0x000001b9 <+441>:	fldcw  -0x1a(%ebp)
   0x000001bc <+444>:	mov    -0x20(%ebp),%eax
   0x000001bf <+447>:	fldl   -0x28(%ebp)
   0x000001c2 <+450>:	fld1   
   0x000001c4 <+452>:	fcomip %st(1),%st
   0x000001c6 <+454>:	fstp   %st(0)
   0x000001c8 <+456>:	jbe    0x289 <mp_format_float+649>
   0x000001ce <+462>:	fildl  -0x20(%ebp)
   0x000001d1 <+465>:	neg    %eax
   0x000001d3 <+467>:	mov    %eax,-0x34(%ebp)
   0x000001d6 <+470>:	fstpl  0x8(%esp)
   0x000001da <+474>:	flds   0x48
   0x000001e0 <+480>:	fstpl  (%esp)
   0x000001e3 <+483>:	call   0x1e8 <mp_format_float+488>
   0x000001e8 <+488>:	mov    -0x34(%ebp),%eax
   0x000001eb <+491>:	mov    %eax,-0x20(%ebp)
   0x000001ee <+494>:	fldl   -0x28(%ebp)
   0x000001f1 <+497>:	fxch   %st(1)
   0x000001f3 <+499>:	fcomi  %st(1),%st
   0x000001f5 <+501>:	fstp   %st(1)
   0x000001f7 <+503>:	jbe    0x21d <mp_format_float+541>
   0x000001f9 <+505>:	fstp   %st(0)
   0x000001fb <+507>:	incl   -0x20(%ebp)
   0x000001fe <+510>:	mov    -0x20(%ebp),%eax
   0x00000201 <+513>:	neg    %eax
   0x00000203 <+515>:	mov    %eax,-0x34(%ebp)
   0x00000206 <+518>:	fildl  -0x34(%ebp)
   0x00000209 <+521>:	fstpl  0x8(%esp)
   0x0000020d <+525>:	flds   0x48
   0x00000213 <+531>:	fstpl  (%esp)
   0x00000216 <+534>:	call   0x21b <mp_format_float+539>
   0x0000021b <+539>:	jmp    0x1ee <mp_format_float+494>
   0x0000021d <+541>:	cmpb   $0x66,-0x29(%ebp)
   0x00000221 <+545>:	je     0x23a <mp_format_float+570>
   0x00000223 <+547>:	cmpl   $0x4,-0x20(%ebp)
   0x00000227 <+551>:	jg     0x25a <mp_format_float+602>
   0x00000229 <+553>:	cmpb   $0x0,-0x2b(%ebp)
   0x0000022d <+557>:	je     0x25a <mp_format_float+602>
   0x0000022f <+559>:	fstp   %st(0)
   0x00000231 <+561>:	mov    -0x20(%ebp),%eax
   0x00000234 <+564>:	lea    -0x1(%eax,%ebx,1),%ebx
   0x00000238 <+568>:	jmp    0x23c <mp_format_float+572>
   0x0000023a <+570>:	fstp   %st(0)
   0x0000023c <+572>:	lea    0x1(%ebx),%eax
   0x0000023f <+575>:	cmp    %edi,%eax
   0x00000241 <+577>:	jl     0x249 <mp_format_float+585>
   0x00000243 <+579>:	mov    -0x30(%ebp),%ebx
   0x00000246 <+582>:	sub    $0x3,%ebx
   0x00000249 <+585>:	xor    %edx,%edx
   0x0000024b <+587>:	lea    0x1(%ebx),%ecx
   0x0000024e <+590>:	xor    %eax,%eax
   0x00000250 <+592>:	xor    %edi,%edi
   0x00000252 <+594>:	mov    %edx,-0x30(%ebp)
   0x00000255 <+597>:	jmp    0x350 <mp_format_float+848>
   0x0000025a <+602>:	mov    -0x30(%ebp),%eax
   0x0000025d <+605>:	sub    $0x7,%eax
   0x00000260 <+608>:	cmp    %ebx,%eax
   0x00000262 <+610>:	jg     0x272 <mp_format_float+626>
   0x00000264 <+612>:	cmpb   $0x67,-0x29(%ebp)
   0x00000268 <+616>:	mov    %eax,%ebx
   0x0000026a <+618>:	je     0x272 <mp_format_float+626>
   0x0000026c <+620>:	mov    -0x30(%ebp),%ebx
   0x0000026f <+623>:	sub    $0x8,%ebx
   0x00000272 <+626>:	fdivrl -0x28(%ebp)
   0x00000275 <+629>:	mov    -0x20(%ebp),%eax
   0x00000278 <+632>:	xor    %ecx,%ecx
   0x0000027a <+634>:	mov    $0x2d,%edi
   0x0000027f <+639>:	neg    %eax
   0x00000281 <+641>:	fstpl  -0x28(%ebp)
   0x00000284 <+644>:	jmp    0x375 <mp_format_float+885>
   0x00000289 <+649>:	inc    %eax
   0x0000028a <+650>:	mov    %eax,-0x34(%ebp)
   0x0000028d <+653>:	fildl  -0x34(%ebp)
   0x00000290 <+656>:	fstpl  0x8(%esp)
   0x00000294 <+660>:	flds   0x48
   0x0000029a <+666>:	fstpl  (%esp)
   0x0000029d <+669>:	call   0x2a2 <mp_format_float+674>
   0x000002a2 <+674>:	fldl   -0x28(%ebp)
   0x000002a5 <+677>:	fcomip %st(1),%st
   0x000002a7 <+679>:	fstp   %st(0)
   0x000002a9 <+681>:	jb     0x2d8 <mp_format_float+728>
   0x000002ab <+683>:	mov    -0x20(%ebp),%eax
   0x000002ae <+686>:	inc    %eax
   0x000002af <+687>:	mov    %eax,-0x34(%ebp)
   0x000002b2 <+690>:	mov    -0x20(%ebp),%eax
   0x000002b5 <+693>:	add    $0x2,%eax
   0x000002b8 <+696>:	mov    %eax,-0x20(%ebp)
   0x000002bb <+699>:	fildl  -0x20(%ebp)
   0x000002be <+702>:	fstpl  0x8(%esp)
   0x000002c2 <+706>:	flds   0x48
   0x000002c8 <+712>:	fstpl  (%esp)
   0x000002cb <+715>:	call   0x2d0 <mp_format_float+720>
   0x000002d0 <+720>:	mov    -0x34(%ebp),%eax
   0x000002d3 <+723>:	mov    %eax,-0x20(%ebp)
   0x000002d6 <+726>:	jmp    0x2a2 <mp_format_float+674>
   0x000002d8 <+728>:	cmpb   $0x66,-0x29(%ebp)
   0x000002dc <+732>:	jne    0x303 <mp_format_float+771>
   0x000002de <+734>:	cmp    %edi,-0x20(%ebp)
   0x000002e1 <+737>:	jge    0x311 <mp_format_float+785>
   0x000002e3 <+739>:	mov    -0x20(%ebp),%eax
   0x000002e6 <+742>:	lea    0x1(%eax,%ebx,1),%eax
   0x000002ea <+746>:	cmp    %edi,%eax
   0x000002ec <+748>:	jl     0x344 <mp_format_float+836>
   0x000002ee <+750>:	mov    %edi,%eax
   0x000002f0 <+752>:	mov    -0x20(%ebp),%edi
   0x000002f3 <+755>:	sub    %edi,%eax
   0x000002f5 <+757>:	lea    -0x2(%eax),%ebx
   0x000002f8 <+760>:	dec    %eax
   0x000002f9 <+761>:	mov    $0x0,%eax
   0x000002fe <+766>:	cmove  %eax,%ebx
   0x00000301 <+769>:	jmp    0x344 <mp_format_float+836>
   0x00000303 <+771>:	cmpb   $0x65,-0x29(%ebp)
   0x00000307 <+775>:	je     0x311 <mp_format_float+785>
   0x00000309 <+777>:	cmpb   $0x67,-0x29(%ebp)
   0x0000030d <+781>:	je     0x323 <mp_format_float+803>
   0x0000030f <+783>:	jmp    0x36b <mp_format_float+875>
   0x00000311 <+785>:	mov    -0x30(%ebp),%eax
   0x00000314 <+788>:	sub    $0x7,%eax
   0x00000317 <+791>:	cmp    %ebx,%eax
   0x00000319 <+793>:	jg     0x356 <mp_format_float+854>
   0x0000031b <+795>:	mov    -0x30(%ebp),%ebx
   0x0000031e <+798>:	sub    $0x8,%ebx
   0x00000321 <+801>:	jmp    0x356 <mp_format_float+854>
   0x00000323 <+803>:	lea    0x5(%ebx),%eax
   0x00000326 <+806>:	cmp    %edi,%eax
   0x00000328 <+808>:	jl     0x337 <mp_format_float+823>
   0x0000032a <+810>:	mov    -0x30(%ebp),%ebx
   0x0000032d <+813>:	sub    $0x7,%ebx
   0x00000330 <+816>:	cmp    %ebx,-0x20(%ebp)
   0x00000333 <+819>:	jl     0x33e <mp_format_float+830>
   0x00000335 <+821>:	jmp    0x36b <mp_format_float+875>
   0x00000337 <+823>:	mov    -0x20(%ebp),%eax
   0x0000033a <+826>:	cmp    %eax,%ebx
   0x0000033c <+828>:	jle    0x36b <mp_format_float+875>
   0x0000033e <+830>:	mov    -0x20(%ebp),%eax
   0x00000341 <+833>:	inc    %eax
   0x00000342 <+834>:	sub    %eax,%ebx
   0x00000344 <+836>:	mov    -0x20(%ebp),%eax
   0x00000347 <+839>:	xor    %edi,%edi
   0x00000349 <+841>:	mov    %eax,-0x30(%ebp)
   0x0000034c <+844>:	lea    0x1(%eax,%ebx,1),%ecx
   0x00000350 <+848>:	movb   $0x66,-0x29(%ebp)
   0x00000354 <+852>:	jmp    0x37a <mp_format_float+890>
   0x00000356 <+854>:	xor    %edx,%edx
   0x00000358 <+856>:	movb   $0x65,-0x29(%ebp)
   0x0000035c <+860>:	mov    -0x20(%ebp),%eax
   0x0000035f <+863>:	xor    %ecx,%ecx
   0x00000361 <+865>:	mov    %edx,-0x30(%ebp)
   0x00000364 <+868>:	mov    $0x2b,%edi
   0x00000369 <+873>:	jmp    0x37a <mp_format_float+890>
   0x0000036b <+875>:	mov    -0x20(%ebp),%eax
   0x0000036e <+878>:	xor    %ecx,%ecx
   0x00000370 <+880>:	mov    $0x2b,%edi
   0x00000375 <+885>:	xor    %edx,%edx
   0x00000377 <+887>:	mov    %edx,-0x30(%ebp)
   0x0000037a <+890>:	xor    %edx,%edx
   0x0000037c <+892>:	test   %ebx,%ebx
   0x0000037e <+894>:	cmovns %ebx,%edx
   0x00000381 <+897>:	cmpb   $0x65,-0x29(%ebp)
   0x00000385 <+901>:	mov    %edx,-0x38(%ebp)
   0x00000388 <+904>:	jne    0x38f <mp_format_float+911>
   0x0000038a <+906>:	mov    %edx,%ecx
   0x0000038c <+908>:	inc    %ecx
   0x0000038d <+909>:	jmp    0x3a3 <mp_format_float+931>
   0x0000038f <+911>:	cmpb   $0x67,-0x29(%ebp)
   0x00000393 <+915>:	jne    0x3a3 <mp_format_float+931>
   0x00000395 <+917>:	test   %ebx,%ebx
   0x00000397 <+919>:	mov    $0x1,%ecx
   0x0000039c <+924>:	cmovg  -0x38(%ebp),%ecx
   0x000003a0 <+928>:	mov    %ecx,-0x38(%ebp)
   0x000003a3 <+931>:	xor    %ebx,%ebx
   0x000003a5 <+933>:	mov    %eax,-0x3c(%ebp)
   0x000003a8 <+936>:	mov    %ebx,-0x34(%ebp)
   0x000003ab <+939>:	test   %ecx,%ecx
   0x000003ad <+941>:	js     0x447 <mp_format_float+1095>
   0x000003b3 <+947>:	cmpl   $0x0,-0x3c(%ebp)
   0x000003b7 <+951>:	fld1   
   0x000003b9 <+953>:	jle    0x3de <mp_format_float+990>
   0x000003bb <+955>:	fstp   %st(0)
   0x000003bd <+957>:	fildl  -0x3c(%ebp)
   0x000003c0 <+960>:	mov    %eax,-0x40(%ebp)
   0x000003c3 <+963>:	mov    %ecx,-0x34(%ebp)
   0x000003c6 <+966>:	fstpl  0x8(%esp)
   0x000003ca <+970>:	flds   0x48
   0x000003d0 <+976>:	fstpl  (%esp)
   0x000003d3 <+979>:	call   0x3d8 <mp_format_float+984>
   0x000003d8 <+984>:	mov    -0x40(%ebp),%eax
   0x000003db <+987>:	mov    -0x34(%ebp),%ecx
   0x000003de <+990>:	xor    %edx,%edx
   0x000003e0 <+992>:	mov    %edx,-0x34(%ebp)
   0x000003e3 <+995>:	fldl   -0x28(%ebp)
   0x000003e6 <+998>:	fxch   %st(1)
   0x000003e8 <+1000>:	fcomi  %st(1),%st
   0x000003ea <+1002>:	ja     0x400 <mp_format_float+1024>
   0x000003ec <+1004>:	incl   -0x34(%ebp)
   0x000003ef <+1007>:	cmpl   $0x9,-0x34(%ebp)
   0x000003f3 <+1011>:	fsubr  %st,%st(1)
   0x000003f5 <+1013>:	fxch   %st(1)
   0x000003f7 <+1015>:	fstpl  -0x28(%ebp)
   0x000003fa <+1018>:	jne    0x3e3 <mp_format_float+995>
   0x000003fc <+1020>:	fstp   %st(0)
   0x000003fe <+1022>:	jmp    0x404 <mp_format_float+1028>
   0x00000400 <+1024>:	fstp   %st(0)
   0x00000402 <+1026>:	fstp   %st(0)
   0x00000404 <+1028>:	test   %ecx,%ecx
   0x00000406 <+1030>:	je     0x42c <mp_format_float+1068>
   0x00000408 <+1032>:	mov    -0x34(%ebp),%dl
   0x0000040b <+1035>:	lea    0x30(%edx),%ebx
   0x0000040e <+1038>:	mov    -0x30(%ebp),%edx
   0x00000411 <+1041>:	mov    %bl,(%esi)
   0x00000413 <+1043>:	sub    %eax,%edx
   0x00000415 <+1045>:	mov    %edx,%ebx
   0x00000417 <+1047>:	add    -0x3c(%ebp),%ebx
   0x0000041a <+1050>:	jne    0x422 <mp_format_float+1058>
   0x0000041c <+1052>:	cmpl   $0x0,-0x38(%ebp)
   0x00000420 <+1056>:	jg     0x425 <mp_format_float+1061>
   0x00000422 <+1058>:	inc    %esi
   0x00000423 <+1059>:	jmp    0x42c <mp_format_float+1068>
   0x00000425 <+1061>:	movb   $0x2e,0x1(%esi)
   0x00000429 <+1065>:	add    $0x2,%esi
   0x0000042c <+1068>:	dec    %ecx
   0x0000042d <+1069>:	cmpl   $0x0,-0x3c(%ebp)
   0x00000431 <+1073>:	jg     0x43f <mp_format_float+1087>
   0x00000433 <+1075>:	fldl   -0x28(%ebp)
   0x00000436 <+1078>:	fmuls  0x48
   0x0000043c <+1084>:	fstpl  -0x28(%ebp)
   0x0000043f <+1087>:	decl   -0x3c(%ebp)
   0x00000442 <+1090>:	jmp    0x3ab <mp_format_float+939>
   0x00000447 <+1095>:	cmpl   $0x4,-0x34(%ebp)
   0x0000044b <+1099>:	lea    -0x1(%esi),%eax
   0x0000044e <+1102>:	jle    0x4f0 <mp_format_float+1264>
   0x00000454 <+1108>:	mov    (%eax),%dl
   0x00000456 <+1110>:	mov    %eax,%ebx
   0x00000458 <+1112>:	cmp    $0x2e,%dl
   0x0000045b <+1115>:	jne    0x460 <mp_format_float+1120>
   0x0000045d <+1117>:	dec    %eax
   0x0000045e <+1118>:	jmp    0x454 <mp_format_float+1108>
   0x00000460 <+1120>:	lea    -0x30(%edx),%ecx
   0x00000463 <+1123>:	cmp    $0x9,%cl
   0x00000466 <+1126>:	jbe    0x46b <mp_format_float+1131>
   0x00000468 <+1128>:	inc    %ebx
   0x00000469 <+1129>:	jmp    0x47d <mp_format_float+1149>
   0x0000046b <+1131>:	cmp    $0x39,%dl
   0x0000046e <+1134>:	je     0x475 <mp_format_float+1141>
   0x00000470 <+1136>:	inc    %edx
   0x00000471 <+1137>:	mov    %dl,(%eax)
   0x00000473 <+1139>:	jmp    0x47d <mp_format_float+1149>
   0x00000475 <+1141>:	movb   $0x30,(%eax)
   0x00000478 <+1144>:	cmp    0x10(%ebp),%eax
   0x0000047b <+1147>:	jne    0x45d <mp_format_float+1117>
   0x0000047d <+1149>:	cmpb   $0x30,(%ebx)
   0x00000480 <+1152>:	jne    0x4f0 <mp_format_float+1264>
   0x00000482 <+1154>:	cmpb   $0x2e,0x1(%ebx)
   0x00000486 <+1158>:	jne    0x4a8 <mp_format_float+1192>
   0x00000488 <+1160>:	cmpb   $0x66,-0x29(%ebp)
   0x0000048c <+1164>:	je     0x4a8 <mp_format_float+1192>
   0x0000048e <+1166>:	mov    %edi,%eax
   0x00000490 <+1168>:	movw   $0x302e,(%ebx)
   0x00000495 <+1173>:	cmp    $0x2d,%al
   0x00000497 <+1175>:	jne    0x4a3 <mp_format_float+1187>
   0x00000499 <+1177>:	decl   -0x20(%ebp)
   0x0000049c <+1180>:	mov    $0x2b,%al
   0x0000049e <+1182>:	cmove  %eax,%edi
   0x000004a1 <+1185>:	jmp    0x4b6 <mp_format_float+1206>
   0x000004a3 <+1187>:	incl   -0x20(%ebp)
   0x000004a6 <+1190>:	jmp    0x4b6 <mp_format_float+1206>
   0x000004a8 <+1192>:	lea    0x1(%esi),%eax
   0x000004ab <+1195>:	mov    %eax,%edx
   0x000004ad <+1197>:	sub    0x10(%ebp),%edx
   0x000004b0 <+1200>:	cmp    0x14(%ebp),%edx
   0x000004b3 <+1203>:	cmovb  %eax,%esi
   0x000004b6 <+1206>:	mov    %esi,%eax
   0x000004b8 <+1208>:	xor    %edx,%edx
   0x000004ba <+1210>:	sub    %ebx,%eax
   0x000004bc <+1212>:	cmp    %ebx,%esi
   0x000004be <+1214>:	cmovb  %edx,%eax
   0x000004c1 <+1217>:	mov    %eax,0x8(%esp)
   0x000004c5 <+1221>:	mov    %ebx,%eax
   0x000004c7 <+1223>:	sub    %esi,%eax
   0x000004c9 <+1225>:	cmp    %ebx,%esi
   0x000004cb <+1227>:	cmovb  %edx,%eax
   0x000004ce <+1230>:	mov    $0x1,%edx
   0x000004d3 <+1235>:	add    %esi,%eax
   0x000004d5 <+1237>:	mov    %eax,0x4(%esp)
   0x000004d9 <+1241>:	lea    0x1(%ebx),%eax
   0x000004dc <+1244>:	sub    %esi,%eax
   0x000004de <+1246>:	cmp    %ebx,%esi
   0x000004e0 <+1248>:	cmovb  %edx,%eax
   0x000004e3 <+1251>:	add    %esi,%eax
   0x000004e5 <+1253>:	mov    %eax,(%esp)
   0x000004e8 <+1256>:	call   0x4ed <mp_format_float+1261>
   0x000004ed <+1261>:	movb   $0x31,(%ebx)
   0x000004f0 <+1264>:	lea    0x1(%esi),%eax
   0x000004f3 <+1267>:	sub    0x10(%ebp),%eax
   0x000004f6 <+1270>:	cmp    %eax,0x14(%ebp)
   0x000004f9 <+1273>:	jae    0x518 <mp_format_float+1304>
   0x000004fb <+1275>:	movl   $0x193,0x8(%esp)
   0x00000503 <+1283>:	movl   $0x0,0x4(%esp)
   0x0000050b <+1291>:	movl   $0x17,(%esp)
   0x00000512 <+1298>:	call   *0x0
   0x00000518 <+1304>:	cmpl   $0x0,-0x38(%ebp)
   0x0000051c <+1308>:	jle    0x533 <mp_format_float+1331>
   0x0000051e <+1310>:	cmpb   $0x0,-0x2b(%ebp)
   0x00000522 <+1314>:	je     0x533 <mp_format_float+1331>
   0x00000524 <+1316>:	mov    -0x1(%esi),%al
   0x00000527 <+1319>:	cmp    $0x30,%al
   0x00000529 <+1321>:	jne    0x52e <mp_format_float+1326>
   0x0000052b <+1323>:	dec    %esi
   0x0000052c <+1324>:	jmp    0x524 <mp_format_float+1316>
   0x0000052e <+1326>:	cmp    $0x2e,%al
   0x00000530 <+1328>:	jne    0x533 <mp_format_float+1331>
   0x00000532 <+1330>:	dec    %esi
   0x00000533 <+1331>:	mov    %edi,%eax
   0x00000535 <+1333>:	test   %al,%al
   0x00000537 <+1335>:	je     0x581 <mp_format_float+1409>
   0x00000539 <+1337>:	mov    -0x2a(%ebp),%al
   0x0000053c <+1340>:	lea    0x2(%esi),%ecx
   0x0000053f <+1343>:	or     $0x45,%eax
   0x00000542 <+1346>:	cmpl   $0x63,-0x20(%ebp)
   0x00000546 <+1350>:	mov    %al,(%esi)
   0x00000548 <+1352>:	mov    %edi,%eax
   0x0000054a <+1354>:	mov    %al,0x1(%esi)
   0x0000054d <+1357>:	jle    0x563 <mp_format_float+1379>
   0x0000054f <+1359>:	mov    -0x20(%ebp),%eax
   0x00000552 <+1362>:	mov    $0x64,%ebx
   0x00000557 <+1367>:	lea    0x3(%esi),%ecx
   0x0000055a <+1370>:	cltd   
   0x0000055b <+1371>:	idiv   %ebx
   0x0000055d <+1373>:	add    $0x30,%eax
   0x00000560 <+1376>:	mov    %al,0x2(%esi)
   0x00000563 <+1379>:	mov    -0x20(%ebp),%eax
   0x00000566 <+1382>:	mov    $0xa,%ebx
   0x0000056b <+1387>:	lea    0x2(%ecx),%esi
   0x0000056e <+1390>:	cltd   
   0x0000056f <+1391>:	idiv   %ebx
   0x00000571 <+1393>:	mov    %edx,%edi
   0x00000573 <+1395>:	cltd   
   0x00000574 <+1396>:	idiv   %ebx
   0x00000576 <+1398>:	lea    0x30(%edi),%eax
   0x00000579 <+1401>:	mov    %al,0x1(%ecx)
   0x0000057c <+1404>:	add    $0x30,%edx
   0x0000057f <+1407>:	mov    %dl,(%ecx)
   0x00000581 <+1409>:	lea    0x1(%esi),%eax
   0x00000584 <+1412>:	movb   $0x0,(%esi)
   0x00000587 <+1415>:	sub    0x10(%ebp),%eax
   0x0000058a <+1418>:	cmp    %eax,0x14(%ebp)
   0x0000058d <+1421>:	jae    0x5ac <mp_format_float+1452>
   0x0000058f <+1423>:	movl   $0x1ab,0x8(%esp)
   0x00000597 <+1431>:	movl   $0x0,0x4(%esp)
   0x0000059f <+1439>:	movl   $0x17,(%esp)
   0x000005a6 <+1446>:	call   *0x0
   0x000005ac <+1452>:	mov    %esi,%eax
   0x000005ae <+1454>:	sub    0x10(%ebp),%eax
   0x000005b1 <+1457>:	add    $0x4c,%esp
   0x000005b4 <+1460>:	pop    %ebx
   0x000005b5 <+1461>:	pop    %esi
   0x000005b6 <+1462>:	pop    %edi
   0x000005b7 <+1463>:	pop    %ebp
   0x000005b8 <+1464>:	ret    
End of assembler dump.
After (using `powl()`):
Dump of assembler code for function mp_format_float:
   0x00000000 <+0>:	push   %ebp
   0x00000001 <+1>:	mov    %esp,%ebp
   0x00000003 <+3>:	push   %edi
   0x00000004 <+4>:	push   %esi
   0x00000005 <+5>:	push   %ebx
   0x00000006 <+6>:	sub    $0x6c,%esp
   0x00000009 <+9>:	fldl   0x8(%ebp)
   0x0000000c <+12>:	cmpl   $0x7,0x14(%ebp)
   0x00000010 <+16>:	mov    0x18(%ebp),%edx
   0x00000013 <+19>:	mov    0x1c(%ebp),%ebx
   0x00000016 <+22>:	mov    0x20(%ebp),%ecx
   0x00000019 <+25>:	fstpl  -0x30(%ebp)
   0x0000001c <+28>:	ja     0x4a <mp_format_float+74>
   0x0000001e <+30>:	cmpl   $0x1,0x14(%ebp)
   0x00000022 <+34>:	jbe    0x30 <mp_format_float+48>
   0x00000024 <+36>:	mov    0x10(%ebp),%eax
   0x00000027 <+39>:	movb   $0x3f,(%eax)
   0x0000002a <+42>:	inc    %eax
   0x0000002b <+43>:	mov    %eax,0x10(%ebp)
   0x0000002e <+46>:	jmp    0x36 <mp_format_float+54>
   0x00000030 <+48>:	cmpl   $0x0,0x14(%ebp)
   0x00000034 <+52>:	je     0x3c <mp_format_float+60>
   0x00000036 <+54>:	mov    0x10(%ebp),%eax
   0x00000039 <+57>:	movb   $0x0,(%eax)
   0x0000003c <+60>:	xor    %eax,%eax
   0x0000003e <+62>:	cmpl   $0x1,0x14(%ebp)
   0x00000042 <+66>:	seta   %al
   0x00000045 <+69>:	jmp    0x5cf <mp_format_float+1487>
   0x0000004a <+74>:	fldl   -0x30(%ebp)
   0x0000004d <+77>:	fxam   
   0x0000004f <+79>:	fstsw  %ax
   0x00000052 <+82>:	test   $0x2,%ah
   0x00000055 <+85>:	je     0x76 <mp_format_float+118>
   0x00000057 <+87>:	fxam   
   0x00000059 <+89>:	fstsw  %ax
   0x0000005c <+92>:	and    $0x4500,%ax
   0x00000060 <+96>:	cmp    $0x100,%ax
   0x00000064 <+100>:	je     0x7a <mp_format_float+122>
   0x00000066 <+102>:	mov    0x10(%ebp),%eax
   0x00000069 <+105>:	fchs   
   0x0000006b <+107>:	movb   $0x2d,(%eax)
   0x0000006e <+110>:	lea    0x1(%eax),%esi
   0x00000071 <+113>:	fstpl  -0x30(%ebp)
   0x00000074 <+116>:	jmp    0x8a <mp_format_float+138>
   0x00000076 <+118>:	fstp   %st(0)
   0x00000078 <+120>:	jmp    0x7c <mp_format_float+124>
   0x0000007a <+122>:	fstp   %st(0)
   0x0000007c <+124>:	mov    0x10(%ebp),%esi
   0x0000007f <+127>:	test   %cl,%cl
   0x00000081 <+129>:	je     0x8a <mp_format_float+138>
   0x00000083 <+131>:	mov    %esi,%eax
   0x00000085 <+133>:	lea    0x1(%esi),%esi
   0x00000088 <+136>:	mov    %cl,(%eax)
   0x0000008a <+138>:	mov    %edx,%eax
   0x0000008c <+140>:	fldl   -0x30(%ebp)
   0x0000008f <+143>:	and    $0x20,%eax
   0x00000092 <+146>:	mov    %al,-0x1e(%ebp)
   0x00000095 <+149>:	fxam   
   0x00000097 <+151>:	fstsw  %ax
   0x0000009a <+154>:	fstp   %st(0)
   0x0000009c <+156>:	and    $0x4500,%ax
   0x000000a0 <+160>:	cmp    $0x500,%ax
   0x000000a4 <+164>:	jne    0xbd <mp_format_float+189>
   0x000000a6 <+166>:	mov    -0x1e(%ebp),%bl
   0x000000a9 <+169>:	mov    -0x1e(%ebp),%cl
   0x000000ac <+172>:	lea    0x3(%esi),%eax
   0x000000af <+175>:	mov    -0x1e(%ebp),%dl
   0x000000b2 <+178>:	xor    $0x49,%ebx
   0x000000b5 <+181>:	xor    $0x4e,%ecx
   0x000000b8 <+184>:	xor    $0x46,%edx
   0x000000bb <+187>:	jmp    0xe2 <mp_format_float+226>
   0x000000bd <+189>:	fldl   -0x30(%ebp)
   0x000000c0 <+192>:	fxam   
   0x000000c2 <+194>:	fstsw  %ax
   0x000000c5 <+197>:	fstp   %st(0)
   0x000000c7 <+199>:	and    $0x4500,%ax
   0x000000cb <+203>:	cmp    $0x100,%ax
   0x000000cf <+207>:	jne    0xf2 <mp_format_float+242>
   0x000000d1 <+209>:	mov    -0x1e(%ebp),%dl
   0x000000d4 <+212>:	mov    -0x1e(%ebp),%cl
   0x000000d7 <+215>:	lea    0x3(%esi),%eax
   0x000000da <+218>:	xor    $0x4e,%edx
   0x000000dd <+221>:	xor    $0x41,%ecx
   0x000000e0 <+224>:	mov    %edx,%ebx
   0x000000e2 <+226>:	mov    %bl,(%esi)
   0x000000e4 <+228>:	mov    %cl,0x1(%esi)
   0x000000e7 <+231>:	mov    %dl,0x2(%esi)
   0x000000ea <+234>:	movb   $0x0,(%eax)
   0x000000ed <+237>:	jmp    0x5cc <mp_format_float+1484>
   0x000000f2 <+242>:	mov    %edx,%eax
   0x000000f4 <+244>:	or     $0x20,%eax
   0x000000f7 <+247>:	cmp    $0x67,%al
   0x000000f9 <+249>:	mov    %al,-0x1d(%ebp)
   0x000000fc <+252>:	sete   -0x1f(%ebp)
   0x00000100 <+256>:	test   %ebx,%ebx
   0x00000102 <+258>:	js     0x120 <mp_format_float+288>
   0x00000104 <+260>:	movzbl -0x1f(%ebp),%edi
   0x00000108 <+264>:	sete   %al
   0x0000010b <+267>:	and    -0x1f(%ebp),%al
   0x0000010e <+270>:	cmovne %eax,%edi
   0x00000111 <+273>:	mov    %edi,%eax
   0x00000113 <+275>:	mov    %al,-0x1f(%ebp)
   0x00000116 <+278>:	mov    $0x1,%eax
   0x0000011b <+283>:	cmovne %eax,%ebx
   0x0000011e <+286>:	jmp    0x125 <mp_format_float+293>
   0x00000120 <+288>:	mov    $0x6,%ebx
   0x00000125 <+293>:	fldz   
   0x00000127 <+295>:	fldl   -0x30(%ebp)
   0x0000012a <+298>:	mov    0x14(%ebp),%edi
   0x0000012d <+301>:	mov    %esi,%eax
   0x0000012f <+303>:	sub    0x10(%ebp),%eax
   0x00000132 <+306>:	sub    %eax,%edi
   0x00000134 <+308>:	fucomip %st(1),%st
   0x00000136 <+310>:	fstp   %st(0)
   0x00000138 <+312>:	mov    %edi,-0x34(%ebp)
   0x0000013b <+315>:	lea    -0x1(%edi),%edi
   0x0000013e <+318>:	jp     0x187 <mp_format_float+391>
   0x00000140 <+320>:	jne    0x187 <mp_format_float+391>
   0x00000142 <+322>:	cmpb   $0x66,-0x1d(%ebp)
   0x00000146 <+326>:	jne    0x15e <mp_format_float+350>
   0x00000148 <+328>:	lea    0x1(%ebx),%eax
   0x0000014b <+331>:	cmp    %edi,%eax
   0x0000014d <+333>:	jl     0x155 <mp_format_float+341>
   0x0000014f <+335>:	mov    -0x34(%ebp),%ebx
   0x00000152 <+338>:	sub    $0x3,%ebx
   0x00000155 <+341>:	lea    0x1(%ebx),%ecx
   0x00000158 <+344>:	xor    %eax,%eax
   0x0000015a <+346>:	xor    %edi,%edi
   0x0000015c <+348>:	jmp    0x17a <mp_format_float+378>
   0x0000015e <+350>:	lea    0x5(%ebx),%eax
   0x00000161 <+353>:	cmp    %edi,%eax
   0x00000163 <+355>:	jl     0x16b <mp_format_float+363>
   0x00000165 <+357>:	mov    -0x34(%ebp),%ebx
   0x00000168 <+360>:	sub    $0x7,%ebx
   0x0000016b <+363>:	xor    %edi,%edi
   0x0000016d <+365>:	xor    %eax,%eax
   0x0000016f <+367>:	xor    %ecx,%ecx
   0x00000171 <+369>:	mov    $0x2b,%dl
   0x00000173 <+371>:	cmpb   $0x65,-0x1d(%ebp)
   0x00000177 <+375>:	cmove  %edx,%edi
   0x0000017a <+378>:	xor    %edx,%edx
   0x0000017c <+380>:	mov    %edx,-0x34(%ebp)
   0x0000017f <+383>:	mov    %edx,-0x28(%ebp)
   0x00000182 <+386>:	jmp    0x392 <mp_format_float+914>
   0x00000187 <+391>:	fnstcw -0x1a(%ebp)
   0x0000018a <+394>:	mov    -0x2c(%ebp),%edx
   0x0000018d <+397>:	fldl   0x40
   0x00000193 <+403>:	mov    %edx,%eax
   0x00000195 <+405>:	shr    $0x14,%eax
   0x00000198 <+408>:	and    $0x7ff,%eax
   0x0000019d <+413>:	sub    $0x3ff,%eax
   0x000001a2 <+418>:	mov    %eax,-0x28(%ebp)
   0x000001a5 <+421>:	mov    -0x1a(%ebp),%ax
   0x000001a9 <+425>:	fimull -0x28(%ebp)
   0x000001ac <+428>:	or     $0xc,%ah
   0x000001af <+431>:	mov    %ax,-0x1c(%ebp)
   0x000001b3 <+435>:	fldcw  -0x1c(%ebp)
   0x000001b6 <+438>:	fistpl -0x28(%ebp)
   0x000001b9 <+441>:	fldcw  -0x1a(%ebp)
   0x000001bc <+444>:	mov    -0x28(%ebp),%eax
   0x000001bf <+447>:	fldl   -0x30(%ebp)
   0x000001c2 <+450>:	fld1   
   0x000001c4 <+452>:	fcomip %st(1),%st
   0x000001c6 <+454>:	fstp   %st(0)
   0x000001c8 <+456>:	jbe    0x295 <mp_format_float+661>
   0x000001ce <+462>:	fildl  -0x28(%ebp)
   0x000001d1 <+465>:	neg    %eax
   0x000001d3 <+467>:	mov    %eax,-0x40(%ebp)
   0x000001d6 <+470>:	fstpt  0xc(%esp)
   0x000001da <+474>:	flds   0x48
   0x000001e0 <+480>:	fstpt  (%esp)
   0x000001e3 <+483>:	call   0x1e8 <mp_format_float+488>
   0x000001e8 <+488>:	mov    -0x40(%ebp),%eax
   0x000001eb <+491>:	fstpl  -0x28(%ebp)
   0x000001ee <+494>:	fldl   -0x28(%ebp)
   0x000001f1 <+497>:	mov    %eax,-0x28(%ebp)
   0x000001f4 <+500>:	fldl   -0x30(%ebp)
   0x000001f7 <+503>:	fxch   %st(1)
   0x000001f9 <+505>:	fcomi  %st(1),%st
   0x000001fb <+507>:	fstp   %st(1)
   0x000001fd <+509>:	jbe    0x229 <mp_format_float+553>
   0x000001ff <+511>:	fstp   %st(0)
   0x00000201 <+513>:	incl   -0x28(%ebp)
   0x00000204 <+516>:	mov    -0x28(%ebp),%eax
   0x00000207 <+519>:	neg    %eax
   0x00000209 <+521>:	mov    %eax,-0x40(%ebp)
   0x0000020c <+524>:	fildl  -0x40(%ebp)
   0x0000020f <+527>:	fstpt  0xc(%esp)
   0x00000213 <+531>:	flds   0x48
   0x00000219 <+537>:	fstpt  (%esp)
   0x0000021c <+540>:	call   0x221 <mp_format_float+545>
   0x00000221 <+545>:	fstpl  -0x40(%ebp)
   0x00000224 <+548>:	fldl   -0x40(%ebp)
   0x00000227 <+551>:	jmp    0x1f4 <mp_format_float+500>
   0x00000229 <+553>:	cmpb   $0x66,-0x1d(%ebp)
   0x0000022d <+557>:	je     0x246 <mp_format_float+582>
   0x0000022f <+559>:	cmpl   $0x4,-0x28(%ebp)
   0x00000233 <+563>:	jg     0x266 <mp_format_float+614>
   0x00000235 <+565>:	cmpb   $0x0,-0x1f(%ebp)
   0x00000239 <+569>:	je     0x266 <mp_format_float+614>
   0x0000023b <+571>:	fstp   %st(0)
   0x0000023d <+573>:	mov    -0x28(%ebp),%eax
   0x00000240 <+576>:	lea    -0x1(%eax,%ebx,1),%ebx
   0x00000244 <+580>:	jmp    0x248 <mp_format_float+584>
   0x00000246 <+582>:	fstp   %st(0)
   0x00000248 <+584>:	lea    0x1(%ebx),%eax
   0x0000024b <+587>:	cmp    %edi,%eax
   0x0000024d <+589>:	jl     0x255 <mp_format_float+597>
   0x0000024f <+591>:	mov    -0x34(%ebp),%ebx
   0x00000252 <+594>:	sub    $0x3,%ebx
   0x00000255 <+597>:	xor    %edx,%edx
   0x00000257 <+599>:	lea    0x1(%ebx),%ecx
   0x0000025a <+602>:	xor    %eax,%eax
   0x0000025c <+604>:	xor    %edi,%edi
   0x0000025e <+606>:	mov    %edx,-0x34(%ebp)
   0x00000261 <+609>:	jmp    0x368 <mp_format_float+872>
   0x00000266 <+614>:	mov    -0x34(%ebp),%eax
   0x00000269 <+617>:	sub    $0x7,%eax
   0x0000026c <+620>:	cmp    %ebx,%eax
   0x0000026e <+622>:	jg     0x27e <mp_format_float+638>
   0x00000270 <+624>:	cmpb   $0x67,-0x1d(%ebp)
   0x00000274 <+628>:	mov    %eax,%ebx
   0x00000276 <+630>:	je     0x27e <mp_format_float+638>
   0x00000278 <+632>:	mov    -0x34(%ebp),%ebx
   0x0000027b <+635>:	sub    $0x8,%ebx
   0x0000027e <+638>:	fdivrl -0x30(%ebp)
   0x00000281 <+641>:	mov    -0x28(%ebp),%eax
   0x00000284 <+644>:	xor    %ecx,%ecx
   0x00000286 <+646>:	mov    $0x2d,%edi
   0x0000028b <+651>:	neg    %eax
   0x0000028d <+653>:	fstpl  -0x30(%ebp)
   0x00000290 <+656>:	jmp    0x38d <mp_format_float+909>
   0x00000295 <+661>:	inc    %eax
   0x00000296 <+662>:	mov    %eax,-0x40(%ebp)
   0x00000299 <+665>:	fildl  -0x40(%ebp)
   0x0000029c <+668>:	fstpt  0xc(%esp)
   0x000002a0 <+672>:	flds   0x48
   0x000002a6 <+678>:	fstpt  (%esp)
   0x000002a9 <+681>:	call   0x2ae <mp_format_float+686>
   0x000002ae <+686>:	fstpl  -0x40(%ebp)
   0x000002b1 <+689>:	fldl   -0x40(%ebp)
   0x000002b4 <+692>:	fldl   -0x30(%ebp)
   0x000002b7 <+695>:	fcomip %st(1),%st
   0x000002b9 <+697>:	fstp   %st(0)
   0x000002bb <+699>:	jb     0x2f0 <mp_format_float+752>
   0x000002bd <+701>:	mov    -0x28(%ebp),%eax
   0x000002c0 <+704>:	inc    %eax
   0x000002c1 <+705>:	mov    %eax,-0x40(%ebp)
   0x000002c4 <+708>:	mov    -0x28(%ebp),%eax
   0x000002c7 <+711>:	add    $0x2,%eax
   0x000002ca <+714>:	mov    %eax,-0x28(%ebp)
   0x000002cd <+717>:	fildl  -0x28(%ebp)
   0x000002d0 <+720>:	fstpt  0xc(%esp)
   0x000002d4 <+724>:	flds   0x48
   0x000002da <+730>:	fstpt  (%esp)
   0x000002dd <+733>:	call   0x2e2 <mp_format_float+738>
   0x000002e2 <+738>:	mov    -0x40(%ebp),%eax
   0x000002e5 <+741>:	fstpl  -0x28(%ebp)
   0x000002e8 <+744>:	fldl   -0x28(%ebp)
   0x000002eb <+747>:	mov    %eax,-0x28(%ebp)
   0x000002ee <+750>:	jmp    0x2b4 <mp_format_float+692>
   0x000002f0 <+752>:	cmpb   $0x66,-0x1d(%ebp)
   0x000002f4 <+756>:	jne    0x31b <mp_format_float+795>
   0x000002f6 <+758>:	cmp    %edi,-0x28(%ebp)
   0x000002f9 <+761>:	jge    0x329 <mp_format_float+809>
   0x000002fb <+763>:	mov    -0x28(%ebp),%eax
   0x000002fe <+766>:	lea    0x1(%eax,%ebx,1),%eax
   0x00000302 <+770>:	cmp    %edi,%eax
   0x00000304 <+772>:	jl     0x35c <mp_format_float+860>
   0x00000306 <+774>:	mov    %edi,%eax
   0x00000308 <+776>:	mov    -0x28(%ebp),%edi
   0x0000030b <+779>:	sub    %edi,%eax
   0x0000030d <+781>:	lea    -0x2(%eax),%ebx
   0x00000310 <+784>:	dec    %eax
   0x00000311 <+785>:	mov    $0x0,%eax
   0x00000316 <+790>:	cmove  %eax,%ebx
   0x00000319 <+793>:	jmp    0x35c <mp_format_float+860>
   0x0000031b <+795>:	cmpb   $0x65,-0x1d(%ebp)
   0x0000031f <+799>:	je     0x329 <mp_format_float+809>
   0x00000321 <+801>:	cmpb   $0x67,-0x1d(%ebp)
   0x00000325 <+805>:	je     0x33b <mp_format_float+827>
   0x00000327 <+807>:	jmp    0x383 <mp_format_float+899>
   0x00000329 <+809>:	mov    -0x34(%ebp),%eax
   0x0000032c <+812>:	sub    $0x7,%eax
   0x0000032f <+815>:	cmp    %ebx,%eax
   0x00000331 <+817>:	jg     0x36e <mp_format_float+878>
   0x00000333 <+819>:	mov    -0x34(%ebp),%ebx
   0x00000336 <+822>:	sub    $0x8,%ebx
   0x00000339 <+825>:	jmp    0x36e <mp_format_float+878>
   0x0000033b <+827>:	lea    0x5(%ebx),%eax
   0x0000033e <+830>:	cmp    %edi,%eax
   0x00000340 <+832>:	jl     0x34f <mp_format_float+847>
   0x00000342 <+834>:	mov    -0x34(%ebp),%ebx
   0x00000345 <+837>:	sub    $0x7,%ebx
   0x00000348 <+840>:	cmp    %ebx,-0x28(%ebp)
   0x0000034b <+843>:	jl     0x356 <mp_format_float+854>
   0x0000034d <+845>:	jmp    0x383 <mp_format_float+899>
   0x0000034f <+847>:	mov    -0x28(%ebp),%eax
   0x00000352 <+850>:	cmp    %eax,%ebx
   0x00000354 <+852>:	jle    0x383 <mp_format_float+899>
   0x00000356 <+854>:	mov    -0x28(%ebp),%eax
   0x00000359 <+857>:	inc    %eax
   0x0000035a <+858>:	sub    %eax,%ebx
   0x0000035c <+860>:	mov    -0x28(%ebp),%eax
   0x0000035f <+863>:	xor    %edi,%edi
   0x00000361 <+865>:	mov    %eax,-0x34(%ebp)
   0x00000364 <+868>:	lea    0x1(%eax,%ebx,1),%ecx
   0x00000368 <+872>:	movb   $0x66,-0x1d(%ebp)
   0x0000036c <+876>:	jmp    0x392 <mp_format_float+914>
   0x0000036e <+878>:	xor    %edx,%edx
   0x00000370 <+880>:	movb   $0x65,-0x1d(%ebp)
   0x00000374 <+884>:	mov    -0x28(%ebp),%eax
   0x00000377 <+887>:	xor    %ecx,%ecx
   0x00000379 <+889>:	mov    %edx,-0x34(%ebp)
   0x0000037c <+892>:	mov    $0x2b,%edi
   0x00000381 <+897>:	jmp    0x392 <mp_format_float+914>
   0x00000383 <+899>:	mov    -0x28(%ebp),%eax
   0x00000386 <+902>:	xor    %ecx,%ecx
   0x00000388 <+904>:	mov    $0x2b,%edi
   0x0000038d <+909>:	xor    %edx,%edx
   0x0000038f <+911>:	mov    %edx,-0x34(%ebp)
   0x00000392 <+914>:	xor    %edx,%edx
   0x00000394 <+916>:	test   %ebx,%ebx
   0x00000396 <+918>:	cmovns %ebx,%edx
   0x00000399 <+921>:	cmpb   $0x65,-0x1d(%ebp)
   0x0000039d <+925>:	mov    %edx,-0x38(%ebp)
   0x000003a0 <+928>:	jne    0x3a7 <mp_format_float+935>
   0x000003a2 <+930>:	mov    %edx,%ecx
   0x000003a4 <+932>:	inc    %ecx
   0x000003a5 <+933>:	jmp    0x3bb <mp_format_float+955>
   0x000003a7 <+935>:	cmpb   $0x67,-0x1d(%ebp)
   0x000003ab <+939>:	jne    0x3bb <mp_format_float+955>
   0x000003ad <+941>:	test   %ebx,%ebx
   0x000003af <+943>:	mov    $0x1,%ecx
   0x000003b4 <+948>:	cmovg  -0x38(%ebp),%ecx
   0x000003b8 <+952>:	mov    %ecx,-0x38(%ebp)
   0x000003bb <+955>:	xor    %ebx,%ebx
   0x000003bd <+957>:	mov    %eax,-0x44(%ebp)
   0x000003c0 <+960>:	mov    %ebx,-0x40(%ebp)
   0x000003c3 <+963>:	test   %ecx,%ecx
   0x000003c5 <+965>:	js     0x465 <mp_format_float+1125>
   0x000003cb <+971>:	cmpl   $0x0,-0x44(%ebp)
   0x000003cf <+975>:	fld1   
   0x000003d1 <+977>:	jle    0x3fc <mp_format_float+1020>
   0x000003d3 <+979>:	fstp   %st(0)
   0x000003d5 <+981>:	fildl  -0x44(%ebp)
   0x000003d8 <+984>:	mov    %eax,-0x4c(%ebp)
   0x000003db <+987>:	mov    %ecx,-0x48(%ebp)
   0x000003de <+990>:	fstpt  0xc(%esp)
   0x000003e2 <+994>:	flds   0x48
   0x000003e8 <+1000>:	fstpt  (%esp)
   0x000003eb <+1003>:	call   0x3f0 <mp_format_float+1008>
   0x000003f0 <+1008>:	mov    -0x4c(%ebp),%eax
   0x000003f3 <+1011>:	mov    -0x48(%ebp),%ecx
   0x000003f6 <+1014>:	fstpl  -0x40(%ebp)
   0x000003f9 <+1017>:	fldl   -0x40(%ebp)
   0x000003fc <+1020>:	xor    %edx,%edx
   0x000003fe <+1022>:	mov    %edx,-0x40(%ebp)
   0x00000401 <+1025>:	fldl   -0x30(%ebp)
   0x00000404 <+1028>:	fxch   %st(1)
   0x00000406 <+1030>:	fcomi  %st(1),%st
   0x00000408 <+1032>:	ja     0x41e <mp_format_float+1054>
   0x0000040a <+1034>:	incl   -0x40(%ebp)
   0x0000040d <+1037>:	cmpl   $0x9,-0x40(%ebp)
   0x00000411 <+1041>:	fsubr  %st,%st(1)
   0x00000413 <+1043>:	fxch   %st(1)
   0x00000415 <+1045>:	fstpl  -0x30(%ebp)
   0x00000418 <+1048>:	jne    0x401 <mp_format_float+1025>
   0x0000041a <+1050>:	fstp   %st(0)
   0x0000041c <+1052>:	jmp    0x422 <mp_format_float+1058>
   0x0000041e <+1054>:	fstp   %st(0)
   0x00000420 <+1056>:	fstp   %st(0)
   0x00000422 <+1058>:	test   %ecx,%ecx
   0x00000424 <+1060>:	je     0x44a <mp_format_float+1098>
   0x00000426 <+1062>:	mov    -0x40(%ebp),%dl
   0x00000429 <+1065>:	lea    0x30(%edx),%ebx
   0x0000042c <+1068>:	mov    -0x34(%ebp),%edx
   0x0000042f <+1071>:	mov    %bl,(%esi)
   0x00000431 <+1073>:	sub    %eax,%edx
   0x00000433 <+1075>:	mov    %edx,%ebx
   0x00000435 <+1077>:	add    -0x44(%ebp),%ebx
   0x00000438 <+1080>:	jne    0x440 <mp_format_float+1088>
   0x0000043a <+1082>:	cmpl   $0x0,-0x38(%ebp)
   0x0000043e <+1086>:	jg     0x443 <mp_format_float+1091>
   0x00000440 <+1088>:	inc    %esi
   0x00000441 <+1089>:	jmp    0x44a <mp_format_float+1098>
   0x00000443 <+1091>:	movb   $0x2e,0x1(%esi)
   0x00000447 <+1095>:	add    $0x2,%esi
   0x0000044a <+1098>:	dec    %ecx
   0x0000044b <+1099>:	cmpl   $0x0,-0x44(%ebp)
   0x0000044f <+1103>:	jg     0x45d <mp_format_float+1117>
   0x00000451 <+1105>:	fldl   -0x30(%ebp)
   0x00000454 <+1108>:	fmuls  0x48
   0x0000045a <+1114>:	fstpl  -0x30(%ebp)
   0x0000045d <+1117>:	decl   -0x44(%ebp)
   0x00000460 <+1120>:	jmp    0x3c3 <mp_format_float+963>
   0x00000465 <+1125>:	cmpl   $0x4,-0x40(%ebp)
   0x00000469 <+1129>:	lea    -0x1(%esi),%eax
   0x0000046c <+1132>:	jle    0x50e <mp_format_float+1294>
   0x00000472 <+1138>:	mov    (%eax),%dl
   0x00000474 <+1140>:	mov    %eax,%ebx
   0x00000476 <+1142>:	cmp    $0x2e,%dl
   0x00000479 <+1145>:	jne    0x47e <mp_format_float+1150>
   0x0000047b <+1147>:	dec    %eax
   0x0000047c <+1148>:	jmp    0x472 <mp_format_float+1138>
   0x0000047e <+1150>:	lea    -0x30(%edx),%ecx
   0x00000481 <+1153>:	cmp    $0x9,%cl
   0x00000484 <+1156>:	jbe    0x489 <mp_format_float+1161>
   0x00000486 <+1158>:	inc    %ebx
   0x00000487 <+1159>:	jmp    0x49b <mp_format_float+1179>
   0x00000489 <+1161>:	cmp    $0x39,%dl
   0x0000048c <+1164>:	je     0x493 <mp_format_float+1171>
   0x0000048e <+1166>:	inc    %edx
   0x0000048f <+1167>:	mov    %dl,(%eax)
   0x00000491 <+1169>:	jmp    0x49b <mp_format_float+1179>
   0x00000493 <+1171>:	movb   $0x30,(%eax)
   0x00000496 <+1174>:	cmp    0x10(%ebp),%eax
   0x00000499 <+1177>:	jne    0x47b <mp_format_float+1147>
   0x0000049b <+1179>:	cmpb   $0x30,(%ebx)
   0x0000049e <+1182>:	jne    0x50e <mp_format_float+1294>
   0x000004a0 <+1184>:	cmpb   $0x2e,0x1(%ebx)
   0x000004a4 <+1188>:	jne    0x4c6 <mp_format_float+1222>
   0x000004a6 <+1190>:	cmpb   $0x66,-0x1d(%ebp)
   0x000004aa <+1194>:	je     0x4c6 <mp_format_float+1222>
   0x000004ac <+1196>:	mov    %edi,%eax
   0x000004ae <+1198>:	movw   $0x302e,(%ebx)
   0x000004b3 <+1203>:	cmp    $0x2d,%al
   0x000004b5 <+1205>:	jne    0x4c1 <mp_format_float+1217>
   0x000004b7 <+1207>:	decl   -0x28(%ebp)
   0x000004ba <+1210>:	mov    $0x2b,%al
   0x000004bc <+1212>:	cmove  %eax,%edi
   0x000004bf <+1215>:	jmp    0x4d4 <mp_format_float+1236>
   0x000004c1 <+1217>:	incl   -0x28(%ebp)
   0x000004c4 <+1220>:	jmp    0x4d4 <mp_format_float+1236>
   0x000004c6 <+1222>:	lea    0x1(%esi),%eax
   0x000004c9 <+1225>:	mov    %eax,%edx
   0x000004cb <+1227>:	sub    0x10(%ebp),%edx
   0x000004ce <+1230>:	cmp    0x14(%ebp),%edx
   0x000004d1 <+1233>:	cmovb  %eax,%esi
   0x000004d4 <+1236>:	mov    %esi,%eax
   0x000004d6 <+1238>:	xor    %edx,%edx
   0x000004d8 <+1240>:	sub    %ebx,%eax
   0x000004da <+1242>:	cmp    %ebx,%esi
   0x000004dc <+1244>:	cmovb  %edx,%eax
   0x000004df <+1247>:	mov    %eax,0x8(%esp)
   0x000004e3 <+1251>:	mov    %ebx,%eax
   0x000004e5 <+1253>:	sub    %esi,%eax
   0x000004e7 <+1255>:	cmp    %ebx,%esi
   0x000004e9 <+1257>:	cmovb  %edx,%eax
   0x000004ec <+1260>:	mov    $0x1,%edx
   0x000004f1 <+1265>:	add    %esi,%eax
   0x000004f3 <+1267>:	mov    %eax,0x4(%esp)
   0x000004f7 <+1271>:	lea    0x1(%ebx),%eax
   0x000004fa <+1274>:	sub    %esi,%eax
   0x000004fc <+1276>:	cmp    %ebx,%esi
   0x000004fe <+1278>:	cmovb  %edx,%eax
   0x00000501 <+1281>:	add    %esi,%eax
   0x00000503 <+1283>:	mov    %eax,(%esp)
   0x00000506 <+1286>:	call   0x50b <mp_format_float+1291>
   0x0000050b <+1291>:	movb   $0x31,(%ebx)
   0x0000050e <+1294>:	lea    0x1(%esi),%eax
   0x00000511 <+1297>:	sub    0x10(%ebp),%eax
   0x00000514 <+1300>:	cmp    %eax,0x14(%ebp)
   0x00000517 <+1303>:	jae    0x536 <mp_format_float+1334>
   0x00000519 <+1305>:	movl   $0x193,0x8(%esp)
   0x00000521 <+1313>:	movl   $0x0,0x4(%esp)
   0x00000529 <+1321>:	movl   $0x17,(%esp)
   0x00000530 <+1328>:	call   *0x0
   0x00000536 <+1334>:	cmpl   $0x0,-0x38(%ebp)
   0x0000053a <+1338>:	jle    0x551 <mp_format_float+1361>
   0x0000053c <+1340>:	cmpb   $0x0,-0x1f(%ebp)
   0x00000540 <+1344>:	je     0x551 <mp_format_float+1361>
   0x00000542 <+1346>:	mov    -0x1(%esi),%al
   0x00000545 <+1349>:	cmp    $0x30,%al
   0x00000547 <+1351>:	jne    0x54c <mp_format_float+1356>
   0x00000549 <+1353>:	dec    %esi
   0x0000054a <+1354>:	jmp    0x542 <mp_format_float+1346>
   0x0000054c <+1356>:	cmp    $0x2e,%al
   0x0000054e <+1358>:	jne    0x551 <mp_format_float+1361>
   0x00000550 <+1360>:	dec    %esi
   0x00000551 <+1361>:	mov    %edi,%eax
   0x00000553 <+1363>:	test   %al,%al
   0x00000555 <+1365>:	je     0x59f <mp_format_float+1439>
   0x00000557 <+1367>:	mov    -0x1e(%ebp),%al
   0x0000055a <+1370>:	lea    0x2(%esi),%ecx
   0x0000055d <+1373>:	or     $0x45,%eax
   0x00000560 <+1376>:	cmpl   $0x63,-0x28(%ebp)
   0x00000564 <+1380>:	mov    %al,(%esi)
   0x00000566 <+1382>:	mov    %edi,%eax
   0x00000568 <+1384>:	mov    %al,0x1(%esi)
   0x0000056b <+1387>:	jle    0x581 <mp_format_float+1409>
   0x0000056d <+1389>:	mov    -0x28(%ebp),%eax
   0x00000570 <+1392>:	mov    $0x64,%ebx
   0x00000575 <+1397>:	lea    0x3(%esi),%ecx
   0x00000578 <+1400>:	cltd   
   0x00000579 <+1401>:	idiv   %ebx
   0x0000057b <+1403>:	add    $0x30,%eax
   0x0000057e <+1406>:	mov    %al,0x2(%esi)
   0x00000581 <+1409>:	mov    -0x28(%ebp),%eax
   0x00000584 <+1412>:	mov    $0xa,%ebx
   0x00000589 <+1417>:	lea    0x2(%ecx),%esi
   0x0000058c <+1420>:	cltd   
   0x0000058d <+1421>:	idiv   %ebx
   0x0000058f <+1423>:	mov    %edx,%edi
   0x00000591 <+1425>:	cltd   
   0x00000592 <+1426>:	idiv   %ebx
   0x00000594 <+1428>:	lea    0x30(%edi),%eax
   0x00000597 <+1431>:	mov    %al,0x1(%ecx)
   0x0000059a <+1434>:	add    $0x30,%edx
   0x0000059d <+1437>:	mov    %dl,(%ecx)
   0x0000059f <+1439>:	lea    0x1(%esi),%eax
   0x000005a2 <+1442>:	movb   $0x0,(%esi)
   0x000005a5 <+1445>:	sub    0x10(%ebp),%eax
   0x000005a8 <+1448>:	cmp    %eax,0x14(%ebp)
   0x000005ab <+1451>:	jae    0x5ca <mp_format_float+1482>
   0x000005ad <+1453>:	movl   $0x1ab,0x8(%esp)
   0x000005b5 <+1461>:	movl   $0x0,0x4(%esp)
   0x000005bd <+1469>:	movl   $0x17,(%esp)
   0x000005c4 <+1476>:	call   *0x0
   0x000005ca <+1482>:	mov    %esi,%eax
   0x000005cc <+1484>:	sub    0x10(%ebp),%eax
   0x000005cf <+1487>:	add    $0x6c,%esp
   0x000005d2 <+1490>:	pop    %ebx
   0x000005d3 <+1491>:	pop    %esi
   0x000005d4 <+1492>:	pop    %edi
   0x000005d5 <+1493>:	pop    %ebp
   0x000005d6 <+1494>:	ret    
End of assembler dump.

They look quite similar - neither calls any actual functions, so gcc must be optimising the pow() calls as inline assembly. So I'm guessing/hoping this doesn't make it any less efficient.

@github-actions
Copy link

Code size report:

   bare-arm:    +0 +0.000% 
minimal x86:    +0 +0.000% 

@dlech dlech force-pushed the mingw32-float-format branch from 5232f70 to fcb3d6d Compare December 19, 2022 00:42
@github-actions
Copy link

Code size report:

   bare-arm:    +0 +0.000% 
minimal x86:    +0 +0.000% 

@codecov-commenter
Copy link

codecov-commenter commented Dec 19, 2022

Codecov Report

All modified and coverable lines are covered by tests ✅

Comparison is base (ac8e7f7) 98.36% compared to head (fcb3d6d) 98.49%.

❗ Current head fcb3d6d differs from pull request most recent head 23342ef. Consider uploading reports for the commit 23342ef to get more accurate results

Additional details and impacted files
@@            Coverage Diff             @@
##           master   #10267      +/-   ##
==========================================
+ Coverage   98.36%   98.49%   +0.13%     
==========================================
  Files         159      155       -4     
  Lines       21093    20528     -565     
==========================================
- Hits        20748    20220     -528     
+ Misses        345      308      -37     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

@dpgeorge
Copy link
Member

I'm not sure this is the right solution. What's probably happening is that it's using 80-bit float precision internally and that's making a slight difference to rounding in the uPy float algorithm. Or the compiler (with optimisation) ignores the IEEE standard; see -funsafe-math-optimizations and -ffast-math.

Maybe there's a compiler option that can be used to fix this? Maybe we can compile floatformat.c with -O0 unconditionally on mingw32?

Otherwise we can simply skip this test on mingw32 (see RUN_TESTS_SKIP in ports/windows/Makefile).

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

Maybe there's a compiler option that can be used to fix this? Maybe we can compile floatformat.c with -O0 unconditionally on mingw32?

We can actually just not optimize the one function. And I have confirmed it fixes the problem too. But since the problem is just with the pow() functions, it seemed like a bit overkill for such a large function compared to using powl() which just changes a few CPU instructions.

diff --git a/py/formatfloat.c b/py/formatfloat.c
index fc1b2fe7f..1daee1360 100644
--- a/py/formatfloat.c
+++ b/py/formatfloat.c
@@ -98,6 +98,11 @@ static inline int fp_expval(FPTYPE x) {
     return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET;
 }
 
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE && defined(__GNUC__) && defined(__MINGW32__) && defined(__i386__)
+// When optimizations are enabled using mingw's gcc, it breaks pow(double, double)
+// and gives wrong results.
+__attribute__((optimize("O0")))
+#endif
 int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) {
 
     char *s = buf;

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

I suppose another option could be to create a wrapper around pow() and compile that function "O0" so that it actually calls pow() instead of inline assembly. EDIT: nope, doesn't seem to work.

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

Maybe there's a compiler option that can be used to fix this?

I think I found the answer: https://lemire.me/blog/2020/06/26/gcc-not-nearest/

@dlech dlech force-pushed the mingw32-float-format branch from fcb3d6d to 847bc69 Compare December 19, 2022 04:10
@dlech dlech force-pushed the mingw32-float-format branch 3 times, most recently from 38e455a to 7d96388 Compare December 19, 2022 04:59
@@ -84,6 +84,11 @@ ifneq ($(FROZEN_MANIFEST),)
CFLAGS += -DMICROPY_QSTR_EXTRA_POOL=mp_qstr_frozen_const_pool -DMICROPY_MODULE_FROZEN_MPY=1 -DMPZ_DIG_SIZE=16
endif

ifeq ($(shell $(CC) -dumpmachine),i686-w64-mingw32)
# https://lemire.me/blog/2020/06/26/gcc-not-nearest
CFLAGS += -msse -mfpmath=sse -march=pentium4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a more substantial comment, eg "force gcc to use IEEE correct rounding when optimising float constants at compile time"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

@dlech dlech force-pushed the mingw32-float-format branch 2 times, most recently from 611b631 to e58d368 Compare December 19, 2022 05:31
@stinos
Copy link
Contributor

stinos commented Dec 19, 2022

FYI for the msvc build we also use -fp:precise (the default) exactly for this reason.

When compiler optimizations are enabled on the mingw version of gcc, we are
getting failing tests because of rounding issues, for example:

    print(float("1e24"))

would print

    9.999999999999999e+23

instead of

    1e+24

It turns out special compiler options are needed to get GCC to use the SSE
instruction set instead of the 387 coprocessor (which uses 80-bit precision
internall).

Signed-off-by: David Lechner <david@pybricks.com>
@dpgeorge dpgeorge force-pushed the mingw32-float-format branch from e58d368 to 23342ef Compare February 5, 2024 03:06
@dpgeorge dpgeorge merged commit 23342ef into micropython:master Feb 5, 2024
@dlech dlech deleted the mingw32-float-format branch February 5, 2024 03:35
tannewt added a commit to tannewt/circuitpython that referenced this pull request May 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
py-core Relates to py/ directory in source
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants