Skip to content

Conversation

dlech
Copy link
Contributor

@dlech dlech commented Dec 19, 2022

When compiler optimizations are enabled on the mingw version of gcc targeting 32-bit Windows, we are getting failing tests because of rounding issues, for example:

print(float("1e24"))

would print

9.999999999999999e+23

instead of

1e+24

We can work around the issue by using powl() instead of pow() in mp_format_float() on affected targets.

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

@dpgeorge dpgeorge added the py-core Relates to py/ directory in source label Dec 19, 2022
@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

I don't know x86 assembly but here is the difference this change makes.

Before (using `pow()`):
Dump of assembler code for function mp_format_float:
   0x00000000 <+0>:	push   %ebp
   0x00000001 <+1>:	mov    %esp,%ebp
   0x00000003 <+3>:	push   %edi
   0x00000004 <+4>:	push   %esi
   0x00000005 <+5>:	push   %ebx
   0x00000006 <+6>:	sub    $0x4c,%esp
   0x00000009 <+9>:	fldl   0x8(%ebp)
   0x0000000c <+12>:	cmpl   $0x7,0x14(%ebp)
   0x00000010 <+16>:	mov    0x18(%ebp),%edx
   0x00000013 <+19>:	mov    0x1c(%ebp),%ebx
   0x00000016 <+22>:	mov    0x20(%ebp),%ecx
   0x00000019 <+25>:	fstpl  -0x28(%ebp)
   0x0000001c <+28>:	ja     0x4a <mp_format_float+74>
   0x0000001e <+30>:	cmpl   $0x1,0x14(%ebp)
   0x00000022 <+34>:	jbe    0x30 <mp_format_float+48>
   0x00000024 <+36>:	mov    0x10(%ebp),%eax
   0x00000027 <+39>:	movb   $0x3f,(%eax)
   0x0000002a <+42>:	inc    %eax
   0x0000002b <+43>:	mov    %eax,0x10(%ebp)
   0x0000002e <+46>:	jmp    0x36 <mp_format_float+54>
   0x00000030 <+48>:	cmpl   $0x0,0x14(%ebp)
   0x00000034 <+52>:	je     0x3c <mp_format_float+60>
   0x00000036 <+54>:	mov    0x10(%ebp),%eax
   0x00000039 <+57>:	movb   $0x0,(%eax)
   0x0000003c <+60>:	xor    %eax,%eax
   0x0000003e <+62>:	cmpl   $0x1,0x14(%ebp)
   0x00000042 <+66>:	seta   %al
   0x00000045 <+69>:	jmp    0x5b1 <mp_format_float+1457>
   0x0000004a <+74>:	fldl   -0x28(%ebp)
   0x0000004d <+77>:	fxam   
   0x0000004f <+79>:	fstsw  %ax
   0x00000052 <+82>:	test   $0x2,%ah
   0x00000055 <+85>:	je     0x76 <mp_format_float+118>
   0x00000057 <+87>:	fxam   
   0x00000059 <+89>:	fstsw  %ax
   0x0000005c <+92>:	and    $0x4500,%ax
   0x00000060 <+96>:	cmp    $0x100,%ax
   0x00000064 <+100>:	je     0x7a <mp_format_float+122>
   0x00000066 <+102>:	mov    0x10(%ebp),%eax
   0x00000069 <+105>:	fchs   
   0x0000006b <+107>:	movb   $0x2d,(%eax)
   0x0000006e <+110>:	lea    0x1(%eax),%esi
   0x00000071 <+113>:	fstpl  -0x28(%ebp)
   0x00000074 <+116>:	jmp    0x8a <mp_format_float+138>
   0x00000076 <+118>:	fstp   %st(0)
   0x00000078 <+120>:	jmp    0x7c <mp_format_float+124>
   0x0000007a <+122>:	fstp   %st(0)
   0x0000007c <+124>:	mov    0x10(%ebp),%esi
   0x0000007f <+127>:	test   %cl,%cl
   0x00000081 <+129>:	je     0x8a <mp_format_float+138>
   0x00000083 <+131>:	mov    %esi,%eax
   0x00000085 <+133>:	lea    0x1(%esi),%esi
   0x00000088 <+136>:	mov    %cl,(%eax)
   0x0000008a <+138>:	mov    %edx,%eax
   0x0000008c <+140>:	fldl   -0x28(%ebp)
   0x0000008f <+143>:	and    $0x20,%eax
   0x00000092 <+146>:	mov    %al,-0x2a(%ebp)
   0x00000095 <+149>:	fxam   
   0x00000097 <+151>:	fstsw  %ax
   0x0000009a <+154>:	fstp   %st(0)
   0x0000009c <+156>:	and    $0x4500,%ax
   0x000000a0 <+160>:	cmp    $0x500,%ax
   0x000000a4 <+164>:	jne    0xbd <mp_format_float+189>
   0x000000a6 <+166>:	mov    -0x2a(%ebp),%bl
   0x000000a9 <+169>:	mov    -0x2a(%ebp),%cl
   0x000000ac <+172>:	lea    0x3(%esi),%eax
   0x000000af <+175>:	mov    -0x2a(%ebp),%dl
   0x000000b2 <+178>:	xor    $0x49,%ebx
   0x000000b5 <+181>:	xor    $0x4e,%ecx
   0x000000b8 <+184>:	xor    $0x46,%edx
   0x000000bb <+187>:	jmp    0xe2 <mp_format_float+226>
   0x000000bd <+189>:	fldl   -0x28(%ebp)
   0x000000c0 <+192>:	fxam   
   0x000000c2 <+194>:	fstsw  %ax
   0x000000c5 <+197>:	fstp   %st(0)
   0x000000c7 <+199>:	and    $0x4500,%ax
   0x000000cb <+203>:	cmp    $0x100,%ax
   0x000000cf <+207>:	jne    0xf2 <mp_format_float+242>
   0x000000d1 <+209>:	mov    -0x2a(%ebp),%dl
   0x000000d4 <+212>:	mov    -0x2a(%ebp),%cl
   0x000000d7 <+215>:	lea    0x3(%esi),%eax
   0x000000da <+218>:	xor    $0x4e,%edx
   0x000000dd <+221>:	xor    $0x41,%ecx
   0x000000e0 <+224>:	mov    %edx,%ebx
   0x000000e2 <+226>:	mov    %bl,(%esi)
   0x000000e4 <+228>:	mov    %cl,0x1(%esi)
   0x000000e7 <+231>:	mov    %dl,0x2(%esi)
   0x000000ea <+234>:	movb   $0x0,(%eax)
   0x000000ed <+237>:	jmp    0x5ae <mp_format_float+1454>
   0x000000f2 <+242>:	mov    %edx,%eax
   0x000000f4 <+244>:	or     $0x20,%eax
   0x000000f7 <+247>:	cmp    $0x67,%al
   0x000000f9 <+249>:	mov    %al,-0x29(%ebp)
   0x000000fc <+252>:	sete   -0x2b(%ebp)
   0x00000100 <+256>:	test   %ebx,%ebx
   0x00000102 <+258>:	js     0x120 <mp_format_float+288>
   0x00000104 <+260>:	movzbl -0x2b(%ebp),%edi
   0x00000108 <+264>:	sete   %al
   0x0000010b <+267>:	and    -0x2b(%ebp),%al
   0x0000010e <+270>:	cmovne %eax,%edi
   0x00000111 <+273>:	mov    %edi,%eax
   0x00000113 <+275>:	mov    %al,-0x2b(%ebp)
   0x00000116 <+278>:	mov    $0x1,%eax
   0x0000011b <+283>:	cmovne %eax,%ebx
   0x0000011e <+286>:	jmp    0x125 <mp_format_float+293>
   0x00000120 <+288>:	mov    $0x6,%ebx
   0x00000125 <+293>:	fldz   
   0x00000127 <+295>:	fldl   -0x28(%ebp)
   0x0000012a <+298>:	mov    0x14(%ebp),%edi
   0x0000012d <+301>:	mov    %esi,%eax
   0x0000012f <+303>:	sub    0x10(%ebp),%eax
   0x00000132 <+306>:	sub    %eax,%edi
   0x00000134 <+308>:	fucomip %st(1),%st
   0x00000136 <+310>:	fstp   %st(0)
   0x00000138 <+312>:	mov    %edi,-0x30(%ebp)
   0x0000013b <+315>:	lea    -0x1(%edi),%edi
   0x0000013e <+318>:	jp     0x187 <mp_format_float+391>
   0x00000140 <+320>:	jne    0x187 <mp_format_float+391>
   0x00000142 <+322>:	cmpb   $0x66,-0x29(%ebp)
   0x00000146 <+326>:	jne    0x15e <mp_format_float+350>
   0x00000148 <+328>:	lea    0x1(%ebx),%eax
   0x0000014b <+331>:	cmp    %edi,%eax
   0x0000014d <+333>:	jl     0x155 <mp_format_float+341>
   0x0000014f <+335>:	mov    -0x30(%ebp),%ebx
   0x00000152 <+338>:	sub    $0x3,%ebx
   0x00000155 <+341>:	lea    0x1(%ebx),%ecx
   0x00000158 <+344>:	xor    %eax,%eax
   0x0000015a <+346>:	xor    %edi,%edi
   0x0000015c <+348>:	jmp    0x17a <mp_format_float+378>
   0x0000015e <+350>:	lea    0x5(%ebx),%eax
   0x00000161 <+353>:	cmp    %edi,%eax
   0x00000163 <+355>:	jl     0x16b <mp_format_float+363>
   0x00000165 <+357>:	mov    -0x30(%ebp),%ebx
   0x00000168 <+360>:	sub    $0x7,%ebx
   0x0000016b <+363>:	xor    %edi,%edi
   0x0000016d <+365>:	xor    %eax,%eax
   0x0000016f <+367>:	xor    %ecx,%ecx
   0x00000171 <+369>:	mov    $0x2b,%dl
   0x00000173 <+371>:	cmpb   $0x65,-0x29(%ebp)
   0x00000177 <+375>:	cmove  %edx,%edi
   0x0000017a <+378>:	xor    %edx,%edx
   0x0000017c <+380>:	mov    %edx,-0x30(%ebp)
   0x0000017f <+383>:	mov    %edx,-0x20(%ebp)
   0x00000182 <+386>:	jmp    0x37a <mp_format_float+890>
   0x00000187 <+391>:	fnstcw -0x1a(%ebp)
   0x0000018a <+394>:	mov    -0x24(%ebp),%edx
   0x0000018d <+397>:	fldl   0x40
   0x00000193 <+403>:	mov    %edx,%eax
   0x00000195 <+405>:	shr    $0x14,%eax
   0x00000198 <+408>:	and    $0x7ff,%eax
   0x0000019d <+413>:	sub    $0x3ff,%eax
   0x000001a2 <+418>:	mov    %eax,-0x20(%ebp)
   0x000001a5 <+421>:	mov    -0x1a(%ebp),%ax
   0x000001a9 <+425>:	fimull -0x20(%ebp)
   0x000001ac <+428>:	or     $0xc,%ah
   0x000001af <+431>:	mov    %ax,-0x1c(%ebp)
   0x000001b3 <+435>:	fldcw  -0x1c(%ebp)
   0x000001b6 <+438>:	fistpl -0x20(%ebp)
   0x000001b9 <+441>:	fldcw  -0x1a(%ebp)
   0x000001bc <+444>:	mov    -0x20(%ebp),%eax
   0x000001bf <+447>:	fldl   -0x28(%ebp)
   0x000001c2 <+450>:	fld1   
   0x000001c4 <+452>:	fcomip %st(1),%st
   0x000001c6 <+454>:	fstp   %st(0)
   0x000001c8 <+456>:	jbe    0x289 <mp_format_float+649>
   0x000001ce <+462>:	fildl  -0x20(%ebp)
   0x000001d1 <+465>:	neg    %eax
   0x000001d3 <+467>:	mov    %eax,-0x34(%ebp)
   0x000001d6 <+470>:	fstpl  0x8(%esp)
   0x000001da <+474>:	flds   0x48
   0x000001e0 <+480>:	fstpl  (%esp)
   0x000001e3 <+483>:	call   0x1e8 <mp_format_float+488>
   0x000001e8 <+488>:	mov    -0x34(%ebp),%eax
   0x000001eb <+491>:	mov    %eax,-0x20(%ebp)
   0x000001ee <+494>:	fldl   -0x28(%ebp)
   0x000001f1 <+497>:	fxch   %st(1)
   0x000001f3 <+499>:	fcomi  %st(1),%st
   0x000001f5 <+501>:	fstp   %st(1)
   0x000001f7 <+503>:	jbe    0x21d <mp_format_float+541>
   0x000001f9 <+505>:	fstp   %st(0)
   0x000001fb <+507>:	incl   -0x20(%ebp)
   0x000001fe <+510>:	mov    -0x20(%ebp),%eax
   0x00000201 <+513>:	neg    %eax
   0x00000203 <+515>:	mov    %eax,-0x34(%ebp)
   0x00000206 <+518>:	fildl  -0x34(%ebp)
   0x00000209 <+521>:	fstpl  0x8(%esp)
   0x0000020d <+525>:	flds   0x48
   0x00000213 <+531>:	fstpl  (%esp)
   0x00000216 <+534>:	call   0x21b <mp_format_float+539>
   0x0000021b <+539>:	jmp    0x1ee <mp_format_float+494>
   0x0000021d <+541>:	cmpb   $0x66,-0x29(%ebp)
   0x00000221 <+545>:	je     0x23a <mp_format_float+570>
   0x00000223 <+547>:	cmpl   $0x4,-0x20(%ebp)
   0x00000227 <+551>:	jg     0x25a <mp_format_float+602>
   0x00000229 <+553>:	cmpb   $0x0,-0x2b(%ebp)
   0x0000022d <+557>:	je     0x25a <mp_format_float+602>
   0x0000022f <+559>:	fstp   %st(0)
   0x00000231 <+561>:	mov    -0x20(%ebp),%eax
   0x00000234 <+564>:	lea    -0x1(%eax,%ebx,1),%ebx
   0x00000238 <+568>:	jmp    0x23c <mp_format_float+572>
   0x0000023a <+570>:	fstp   %st(0)
   0x0000023c <+572>:	lea    0x1(%ebx),%eax
   0x0000023f <+575>:	cmp    %edi,%eax
   0x00000241 <+577>:	jl     0x249 <mp_format_float+585>
   0x00000243 <+579>:	mov    -0x30(%ebp),%ebx
   0x00000246 <+582>:	sub    $0x3,%ebx
   0x00000249 <+585>:	xor    %edx,%edx
   0x0000024b <+587>:	lea    0x1(%ebx),%ecx
   0x0000024e <+590>:	xor    %eax,%eax
   0x00000250 <+592>:	xor    %edi,%edi
   0x00000252 <+594>:	mov    %edx,-0x30(%ebp)
   0x00000255 <+597>:	jmp    0x350 <mp_format_float+848>
   0x0000025a <+602>:	mov    -0x30(%ebp),%eax
   0x0000025d <+605>:	sub    $0x7,%eax
   0x00000260 <+608>:	cmp    %ebx,%eax
   0x00000262 <+610>:	jg     0x272 <mp_format_float+626>
   0x00000264 <+612>:	cmpb   $0x67,-0x29(%ebp)
   0x00000268 <+616>:	mov    %eax,%ebx
   0x0000026a <+618>:	je     0x272 <mp_format_float+626>
   0x0000026c <+620>:	mov    -0x30(%ebp),%ebx
   0x0000026f <+623>:	sub    $0x8,%ebx
   0x00000272 <+626>:	fdivrl -0x28(%ebp)
   0x00000275 <+629>:	mov    -0x20(%ebp),%eax
   0x00000278 <+632>:	xor    %ecx,%ecx
   0x0000027a <+634>:	mov    $0x2d,%edi
   0x0000027f <+639>:	neg    %eax
   0x00000281 <+641>:	fstpl  -0x28(%ebp)
   0x00000284 <+644>:	jmp    0x375 <mp_format_float+885>
   0x00000289 <+649>:	inc    %eax
   0x0000028a <+650>:	mov    %eax,-0x34(%ebp)
   0x0000028d <+653>:	fildl  -0x34(%ebp)
   0x00000290 <+656>:	fstpl  0x8(%esp)
   0x00000294 <+660>:	flds   0x48
   0x0000029a <+666>:	fstpl  (%esp)
   0x0000029d <+669>:	call   0x2a2 <mp_format_float+674>
   0x000002a2 <+674>:	fldl   -0x28(%ebp)
   0x000002a5 <+677>:	fcomip %st(1),%st
   0x000002a7 <+679>:	fstp   %st(0)
   0x000002a9 <+681>:	jb     0x2d8 <mp_format_float+728>
   0x000002ab <+683>:	mov    -0x20(%ebp),%eax
   0x000002ae <+686>:	inc    %eax
   0x000002af <+687>:	mov    %eax,-0x34(%ebp)
   0x000002b2 <+690>:	mov    -0x20(%ebp),%eax
   0x000002b5 <+693>:	add    $0x2,%eax
   0x000002b8 <+696>:	mov    %eax,-0x20(%ebp)
   0x000002bb <+699>:	fildl  -0x20(%ebp)
   0x000002be <+702>:	fstpl  0x8(%esp)
   0x000002c2 <+706>:	flds   0x48
   0x000002c8 <+712>:	fstpl  (%esp)
   0x000002cb <+715>:	call   0x2d0 <mp_format_float+720>
   0x000002d0 <+720>:	mov    -0x34(%ebp),%eax
   0x000002d3 <+723>:	mov    %eax,-0x20(%ebp)
   0x000002d6 <+726>:	jmp    0x2a2 <mp_format_float+674>
   0x000002d8 <+728>:	cmpb   $0x66,-0x29(%ebp)
   0x000002dc <+732>:	jne    0x303 <mp_format_float+771>
   0x000002de <+734>:	cmp    %edi,-0x20(%ebp)
   0x000002e1 <+737>:	jge    0x311 <mp_format_float+785>
   0x000002e3 <+739>:	mov    -0x20(%ebp),%eax
   0x000002e6 <+742>:	lea    0x1(%eax,%ebx,1),%eax
   0x000002ea <+746>:	cmp    %edi,%eax
   0x000002ec <+748>:	jl     0x344 <mp_format_float+836>
   0x000002ee <+750>:	mov    %edi,%eax
   0x000002f0 <+752>:	mov    -0x20(%ebp),%edi
   0x000002f3 <+755>:	sub    %edi,%eax
   0x000002f5 <+757>:	lea    -0x2(%eax),%ebx
   0x000002f8 <+760>:	dec    %eax
   0x000002f9 <+761>:	mov    $0x0,%eax
   0x000002fe <+766>:	cmove  %eax,%ebx
   0x00000301 <+769>:	jmp    0x344 <mp_format_float+836>
   0x00000303 <+771>:	cmpb   $0x65,-0x29(%ebp)
   0x00000307 <+775>:	je     0x311 <mp_format_float+785>
   0x00000309 <+777>:	cmpb   $0x67,-0x29(%ebp)
   0x0000030d <+781>:	je     0x323 <mp_format_float+803>
   0x0000030f <+783>:	jmp    0x36b <mp_format_float+875>
   0x00000311 <+785>:	mov    -0x30(%ebp),%eax
   0x00000314 <+788>:	sub    $0x7,%eax
   0x00000317 <+791>:	cmp    %ebx,%eax
   0x00000319 <+793>:	jg     0x356 <mp_format_float+854>
   0x0000031b <+795>:	mov    -0x30(%ebp),%ebx
   0x0000031e <+798>:	sub    $0x8,%ebx
   0x00000321 <+801>:	jmp    0x356 <mp_format_float+854>
   0x00000323 <+803>:	lea    0x5(%ebx),%eax
   0x00000326 <+806>:	cmp    %edi,%eax
   0x00000328 <+808>:	jl     0x337 <mp_format_float+823>
   0x0000032a <+810>:	mov    -0x30(%ebp),%ebx
   0x0000032d <+813>:	sub    $0x7,%ebx
   0x00000330 <+816>:	cmp    %ebx,-0x20(%ebp)
   0x00000333 <+819>:	jl     0x33e <mp_format_float+830>
   0x00000335 <+821>:	jmp    0x36b <mp_format_float+875>
   0x00000337 <+823>:	mov    -0x20(%ebp),%eax
   0x0000033a <+826>:	cmp    %eax,%ebx
   0x0000033c <+828>:	jle    0x36b <mp_format_float+875>
   0x0000033e <+830>:	mov    -0x20(%ebp),%eax
   0x00000341 <+833>:	inc    %eax
   0x00000342 <+834>:	sub    %eax,%ebx
   0x00000344 <+836>:	mov    -0x20(%ebp),%eax
   0x00000347 <+839>:	xor    %edi,%edi
   0x00000349 <+841>:	mov    %eax,-0x30(%ebp)
   0x0000034c <+844>:	lea    0x1(%eax,%ebx,1),%ecx
   0x00000350 <+848>:	movb   $0x66,-0x29(%ebp)
   0x00000354 <+852>:	jmp    0x37a <mp_format_float+890>
   0x00000356 <+854>:	xor    %edx,%edx
   0x00000358 <+856>:	movb   $0x65,-0x29(%ebp)
   0x0000035c <+860>:	mov    -0x20(%ebp),%eax
   0x0000035f <+863>:	xor    %ecx,%ecx
   0x00000361 <+865>:	mov    %edx,-0x30(%ebp)
   0x00000364 <+868>:	mov    $0x2b,%edi
   0x00000369 <+873>:	jmp    0x37a <mp_format_float+890>
   0x0000036b <+875>:	mov    -0x20(%ebp),%eax
   0x0000036e <+878>:	xor    %ecx,%ecx
   0x00000370 <+880>:	mov    $0x2b,%edi
   0x00000375 <+885>:	xor    %edx,%edx
   0x00000377 <+887>:	mov    %edx,-0x30(%ebp)
   0x0000037a <+890>:	xor    %edx,%edx
   0x0000037c <+892>:	test   %ebx,%ebx
   0x0000037e <+894>:	cmovns %ebx,%edx
   0x00000381 <+897>:	cmpb   $0x65,-0x29(%ebp)
   0x00000385 <+901>:	mov    %edx,-0x38(%ebp)
   0x00000388 <+904>:	jne    0x38f <mp_format_float+911>
   0x0000038a <+906>:	mov    %edx,%ecx
   0x0000038c <+908>:	inc    %ecx
   0x0000038d <+909>:	jmp    0x3a3 <mp_format_float+931>
   0x0000038f <+911>:	cmpb   $0x67,-0x29(%ebp)
   0x00000393 <+915>:	jne    0x3a3 <mp_format_float+931>
   0x00000395 <+917>:	test   %ebx,%ebx
   0x00000397 <+919>:	mov    $0x1,%ecx
   0x0000039c <+924>:	cmovg  -0x38(%ebp),%ecx
   0x000003a0 <+928>:	mov    %ecx,-0x38(%ebp)
   0x000003a3 <+931>:	xor    %ebx,%ebx
   0x000003a5 <+933>:	mov    %eax,-0x3c(%ebp)
   0x000003a8 <+936>:	mov    %ebx,-0x34(%ebp)
   0x000003ab <+939>:	test   %ecx,%ecx
   0x000003ad <+941>:	js     0x447 <mp_format_float+1095>
   0x000003b3 <+947>:	cmpl   $0x0,-0x3c(%ebp)
   0x000003b7 <+951>:	fld1   
   0x000003b9 <+953>:	jle    0x3de <mp_format_float+990>
   0x000003bb <+955>:	fstp   %st(0)
   0x000003bd <+957>:	fildl  -0x3c(%ebp)
   0x000003c0 <+960>:	mov    %eax,-0x40(%ebp)
   0x000003c3 <+963>:	mov    %ecx,-0x34(%ebp)
   0x000003c6 <+966>:	fstpl  0x8(%esp)
   0x000003ca <+970>:	flds   0x48
   0x000003d0 <+976>:	fstpl  (%esp)
   0x000003d3 <+979>:	call   0x3d8 <mp_format_float+984>
   0x000003d8 <+984>:	mov    -0x40(%ebp),%eax
   0x000003db <+987>:	mov    -0x34(%ebp),%ecx
   0x000003de <+990>:	xor    %edx,%edx
   0x000003e0 <+992>:	mov    %edx,-0x34(%ebp)
   0x000003e3 <+995>:	fldl   -0x28(%ebp)
   0x000003e6 <+998>:	fxch   %st(1)
   0x000003e8 <+1000>:	fcomi  %st(1),%st
   0x000003ea <+1002>:	ja     0x400 <mp_format_float+1024>
   0x000003ec <+1004>:	incl   -0x34(%ebp)
   0x000003ef <+1007>:	cmpl   $0x9,-0x34(%ebp)
   0x000003f3 <+1011>:	fsubr  %st,%st(1)
   0x000003f5 <+1013>:	fxch   %st(1)
   0x000003f7 <+1015>:	fstpl  -0x28(%ebp)
   0x000003fa <+1018>:	jne    0x3e3 <mp_format_float+995>
   0x000003fc <+1020>:	fstp   %st(0)
   0x000003fe <+1022>:	jmp    0x404 <mp_format_float+1028>
   0x00000400 <+1024>:	fstp   %st(0)
   0x00000402 <+1026>:	fstp   %st(0)
   0x00000404 <+1028>:	test   %ecx,%ecx
   0x00000406 <+1030>:	je     0x42c <mp_format_float+1068>
   0x00000408 <+1032>:	mov    -0x34(%ebp),%dl
   0x0000040b <+1035>:	lea    0x30(%edx),%ebx
   0x0000040e <+1038>:	mov    -0x30(%ebp),%edx
   0x00000411 <+1041>:	mov    %bl,(%esi)
   0x00000413 <+1043>:	sub    %eax,%edx
   0x00000415 <+1045>:	mov    %edx,%ebx
   0x00000417 <+1047>:	add    -0x3c(%ebp),%ebx
   0x0000041a <+1050>:	jne    0x422 <mp_format_float+1058>
   0x0000041c <+1052>:	cmpl   $0x0,-0x38(%ebp)
   0x00000420 <+1056>:	jg     0x425 <mp_format_float+1061>
   0x00000422 <+1058>:	inc    %esi
   0x00000423 <+1059>:	jmp    0x42c <mp_format_float+1068>
   0x00000425 <+1061>:	movb   $0x2e,0x1(%esi)
   0x00000429 <+1065>:	add    $0x2,%esi
   0x0000042c <+1068>:	dec    %ecx
   0x0000042d <+1069>:	cmpl   $0x0,-0x3c(%ebp)
   0x00000431 <+1073>:	jg     0x43f <mp_format_float+1087>
   0x00000433 <+1075>:	fldl   -0x28(%ebp)
   0x00000436 <+1078>:	fmuls  0x48
   0x0000043c <+1084>:	fstpl  -0x28(%ebp)
   0x0000043f <+1087>:	decl   -0x3c(%ebp)
   0x00000442 <+1090>:	jmp    0x3ab <mp_format_float+939>
   0x00000447 <+1095>:	cmpl   $0x4,-0x34(%ebp)
   0x0000044b <+1099>:	lea    -0x1(%esi),%eax
   0x0000044e <+1102>:	jle    0x4f0 <mp_format_float+1264>
   0x00000454 <+1108>:	mov    (%eax),%dl
   0x00000456 <+1110>:	mov    %eax,%ebx
   0x00000458 <+1112>:	cmp    $0x2e,%dl
   0x0000045b <+1115>:	jne    0x460 <mp_format_float+1120>
   0x0000045d <+1117>:	dec    %eax
   0x0000045e <+1118>:	jmp    0x454 <mp_format_float+1108>
   0x00000460 <+1120>:	lea    -0x30(%edx),%ecx
   0x00000463 <+1123>:	cmp    $0x9,%cl
   0x00000466 <+1126>:	jbe    0x46b <mp_format_float+1131>
   0x00000468 <+1128>:	inc    %ebx
   0x00000469 <+1129>:	jmp    0x47d <mp_format_float+1149>
   0x0000046b <+1131>:	cmp    $0x39,%dl
   0x0000046e <+1134>:	je     0x475 <mp_format_float+1141>
   0x00000470 <+1136>:	inc    %edx
   0x00000471 <+1137>:	mov    %dl,(%eax)
   0x00000473 <+1139>:	jmp    0x47d <mp_format_float+1149>
   0x00000475 <+1141>:	movb   $0x30,(%eax)
   0x00000478 <+1144>:	cmp    0x10(%ebp),%eax
   0x0000047b <+1147>:	jne    0x45d <mp_format_float+1117>
   0x0000047d <+1149>:	cmpb   $0x30,(%ebx)
   0x00000480 <+1152>:	jne    0x4f0 <mp_format_float+1264>
   0x00000482 <+1154>:	cmpb   $0x2e,0x1(%ebx)
   0x00000486 <+1158>:	jne    0x4a8 <mp_format_float+1192>
   0x00000488 <+1160>:	cmpb   $0x66,-0x29(%ebp)
   0x0000048c <+1164>:	je     0x4a8 <mp_format_float+1192>
   0x0000048e <+1166>:	mov    %edi,%eax
   0x00000490 <+1168>:	movw   $0x302e,(%ebx)
   0x00000495 <+1173>:	cmp    $0x2d,%al
   0x00000497 <+1175>:	jne    0x4a3 <mp_format_float+1187>
   0x00000499 <+1177>:	decl   -0x20(%ebp)
   0x0000049c <+1180>:	mov    $0x2b,%al
   0x0000049e <+1182>:	cmove  %eax,%edi
   0x000004a1 <+1185>:	jmp    0x4b6 <mp_format_float+1206>
   0x000004a3 <+1187>:	incl   -0x20(%ebp)
   0x000004a6 <+1190>:	jmp    0x4b6 <mp_format_float+1206>
   0x000004a8 <+1192>:	lea    0x1(%esi),%eax
   0x000004ab <+1195>:	mov    %eax,%edx
   0x000004ad <+1197>:	sub    0x10(%ebp),%edx
   0x000004b0 <+1200>:	cmp    0x14(%ebp),%edx
   0x000004b3 <+1203>:	cmovb  %eax,%esi
   0x000004b6 <+1206>:	mov    %esi,%eax
   0x000004b8 <+1208>:	xor    %edx,%edx
   0x000004ba <+1210>:	sub    %ebx,%eax
   0x000004bc <+1212>:	cmp    %ebx,%esi
   0x000004be <+1214>:	cmovb  %edx,%eax
   0x000004c1 <+1217>:	mov    %eax,0x8(%esp)
   0x000004c5 <+1221>:	mov    %ebx,%eax
   0x000004c7 <+1223>:	sub    %esi,%eax
   0x000004c9 <+1225>:	cmp    %ebx,%esi
   0x000004cb <+1227>:	cmovb  %edx,%eax
   0x000004ce <+1230>:	mov    $0x1,%edx
   0x000004d3 <+1235>:	add    %esi,%eax
   0x000004d5 <+1237>:	mov    %eax,0x4(%esp)
   0x000004d9 <+1241>:	lea    0x1(%ebx),%eax
   0x000004dc <+1244>:	sub    %esi,%eax
   0x000004de <+1246>:	cmp    %ebx,%esi
   0x000004e0 <+1248>:	cmovb  %edx,%eax
   0x000004e3 <+1251>:	add    %esi,%eax
   0x000004e5 <+1253>:	mov    %eax,(%esp)
   0x000004e8 <+1256>:	call   0x4ed <mp_format_float+1261>
   0x000004ed <+1261>:	movb   $0x31,(%ebx)
   0x000004f0 <+1264>:	lea    0x1(%esi),%eax
   0x000004f3 <+1267>:	sub    0x10(%ebp),%eax
   0x000004f6 <+1270>:	cmp    %eax,0x14(%ebp)
   0x000004f9 <+1273>:	jae    0x518 <mp_format_float+1304>
   0x000004fb <+1275>:	movl   $0x193,0x8(%esp)
   0x00000503 <+1283>:	movl   $0x0,0x4(%esp)
   0x0000050b <+1291>:	movl   $0x17,(%esp)
   0x00000512 <+1298>:	call   *0x0
   0x00000518 <+1304>:	cmpl   $0x0,-0x38(%ebp)
   0x0000051c <+1308>:	jle    0x533 <mp_format_float+1331>
   0x0000051e <+1310>:	cmpb   $0x0,-0x2b(%ebp)
   0x00000522 <+1314>:	je     0x533 <mp_format_float+1331>
   0x00000524 <+1316>:	mov    -0x1(%esi),%al
   0x00000527 <+1319>:	cmp    $0x30,%al
   0x00000529 <+1321>:	jne    0x52e <mp_format_float+1326>
   0x0000052b <+1323>:	dec    %esi
   0x0000052c <+1324>:	jmp    0x524 <mp_format_float+1316>
   0x0000052e <+1326>:	cmp    $0x2e,%al
   0x00000530 <+1328>:	jne    0x533 <mp_format_float+1331>
   0x00000532 <+1330>:	dec    %esi
   0x00000533 <+1331>:	mov    %edi,%eax
   0x00000535 <+1333>:	test   %al,%al
   0x00000537 <+1335>:	je     0x581 <mp_format_float+1409>
   0x00000539 <+1337>:	mov    -0x2a(%ebp),%al
   0x0000053c <+1340>:	lea    0x2(%esi),%ecx
   0x0000053f <+1343>:	or     $0x45,%eax
   0x00000542 <+1346>:	cmpl   $0x63,-0x20(%ebp)
   0x00000546 <+1350>:	mov    %al,(%esi)
   0x00000548 <+1352>:	mov    %edi,%eax
   0x0000054a <+1354>:	mov    %al,0x1(%esi)
   0x0000054d <+1357>:	jle    0x563 <mp_format_float+1379>
   0x0000054f <+1359>:	mov    -0x20(%ebp),%eax
   0x00000552 <+1362>:	mov    $0x64,%ebx
   0x00000557 <+1367>:	lea    0x3(%esi),%ecx
   0x0000055a <+1370>:	cltd   
   0x0000055b <+1371>:	idiv   %ebx
   0x0000055d <+1373>:	add    $0x30,%eax
   0x00000560 <+1376>:	mov    %al,0x2(%esi)
   0x00000563 <+1379>:	mov    -0x20(%ebp),%eax
   0x00000566 <+1382>:	mov    $0xa,%ebx
   0x0000056b <+1387>:	lea    0x2(%ecx),%esi
   0x0000056e <+1390>:	cltd   
   0x0000056f <+1391>:	idiv   %ebx
   0x00000571 <+1393>:	mov    %edx,%edi
   0x00000573 <+1395>:	cltd   
   0x00000574 <+1396>:	idiv   %ebx
   0x00000576 <+1398>:	lea    0x30(%edi),%eax
   0x00000579 <+1401>:	mov    %al,0x1(%ecx)
   0x0000057c <+1404>:	add    $0x30,%edx
   0x0000057f <+1407>:	mov    %dl,(%ecx)
   0x00000581 <+1409>:	lea    0x1(%esi),%eax
   0x00000584 <+1412>:	movb   $0x0,(%esi)
   0x00000587 <+1415>:	sub    0x10(%ebp),%eax
   0x0000058a <+1418>:	cmp    %eax,0x14(%ebp)
   0x0000058d <+1421>:	jae    0x5ac <mp_format_float+1452>
   0x0000058f <+1423>:	movl   $0x1ab,0x8(%esp)
   0x00000597 <+1431>:	movl   $0x0,0x4(%esp)
   0x0000059f <+1439>:	movl   $0x17,(%esp)
   0x000005a6 <+1446>:	call   *0x0
   0x000005ac <+1452>:	mov    %esi,%eax
   0x000005ae <+1454>:	sub    0x10(%ebp),%eax
   0x000005b1 <+1457>:	add    $0x4c,%esp
   0x000005b4 <+1460>:	pop    %ebx
   0x000005b5 <+1461>:	pop    %esi
   0x000005b6 <+1462>:	pop    %edi
   0x000005b7 <+1463>:	pop    %ebp
   0x000005b8 <+1464>:	ret    
End of assembler dump.
After (using `powl()`):
Dump of assembler code for function mp_format_float:
   0x00000000 <+0>:	push   %ebp
   0x00000001 <+1>:	mov    %esp,%ebp
   0x00000003 <+3>:	push   %edi
   0x00000004 <+4>:	push   %esi
   0x00000005 <+5>:	push   %ebx
   0x00000006 <+6>:	sub    $0x6c,%esp
   0x00000009 <+9>:	fldl   0x8(%ebp)
   0x0000000c <+12>:	cmpl   $0x7,0x14(%ebp)
   0x00000010 <+16>:	mov    0x18(%ebp),%edx
   0x00000013 <+19>:	mov    0x1c(%ebp),%ebx
   0x00000016 <+22>:	mov    0x20(%ebp),%ecx
   0x00000019 <+25>:	fstpl  -0x30(%ebp)
   0x0000001c <+28>:	ja     0x4a <mp_format_float+74>
   0x0000001e <+30>:	cmpl   $0x1,0x14(%ebp)
   0x00000022 <+34>:	jbe    0x30 <mp_format_float+48>
   0x00000024 <+36>:	mov    0x10(%ebp),%eax
   0x00000027 <+39>:	movb   $0x3f,(%eax)
   0x0000002a <+42>:	inc    %eax
   0x0000002b <+43>:	mov    %eax,0x10(%ebp)
   0x0000002e <+46>:	jmp    0x36 <mp_format_float+54>
   0x00000030 <+48>:	cmpl   $0x0,0x14(%ebp)
   0x00000034 <+52>:	je     0x3c <mp_format_float+60>
   0x00000036 <+54>:	mov    0x10(%ebp),%eax
   0x00000039 <+57>:	movb   $0x0,(%eax)
   0x0000003c <+60>:	xor    %eax,%eax
   0x0000003e <+62>:	cmpl   $0x1,0x14(%ebp)
   0x00000042 <+66>:	seta   %al
   0x00000045 <+69>:	jmp    0x5cf <mp_format_float+1487>
   0x0000004a <+74>:	fldl   -0x30(%ebp)
   0x0000004d <+77>:	fxam   
   0x0000004f <+79>:	fstsw  %ax
   0x00000052 <+82>:	test   $0x2,%ah
   0x00000055 <+85>:	je     0x76 <mp_format_float+118>
   0x00000057 <+87>:	fxam   
   0x00000059 <+89>:	fstsw  %ax
   0x0000005c <+92>:	and    $0x4500,%ax
   0x00000060 <+96>:	cmp    $0x100,%ax
   0x00000064 <+100>:	je     0x7a <mp_format_float+122>
   0x00000066 <+102>:	mov    0x10(%ebp),%eax
   0x00000069 <+105>:	fchs   
   0x0000006b <+107>:	movb   $0x2d,(%eax)
   0x0000006e <+110>:	lea    0x1(%eax),%esi
   0x00000071 <+113>:	fstpl  -0x30(%ebp)
   0x00000074 <+116>:	jmp    0x8a <mp_format_float+138>
   0x00000076 <+118>:	fstp   %st(0)
   0x00000078 <+120>:	jmp    0x7c <mp_format_float+124>
   0x0000007a <+122>:	fstp   %st(0)
   0x0000007c <+124>:	mov    0x10(%ebp),%esi
   0x0000007f <+127>:	test   %cl,%cl
   0x00000081 <+129>:	je     0x8a <mp_format_float+138>
   0x00000083 <+131>:	mov    %esi,%eax
   0x00000085 <+133>:	lea    0x1(%esi),%esi
   0x00000088 <+136>:	mov    %cl,(%eax)
   0x0000008a <+138>:	mov    %edx,%eax
   0x0000008c <+140>:	fldl   -0x30(%ebp)
   0x0000008f <+143>:	and    $0x20,%eax
   0x00000092 <+146>:	mov    %al,-0x1e(%ebp)
   0x00000095 <+149>:	fxam   
   0x00000097 <+151>:	fstsw  %ax
   0x0000009a <+154>:	fstp   %st(0)
   0x0000009c <+156>:	and    $0x4500,%ax
   0x000000a0 <+160>:	cmp    $0x500,%ax
   0x000000a4 <+164>:	jne    0xbd <mp_format_float+189>
   0x000000a6 <+166>:	mov    -0x1e(%ebp),%bl
   0x000000a9 <+169>:	mov    -0x1e(%ebp),%cl
   0x000000ac <+172>:	lea    0x3(%esi),%eax
   0x000000af <+175>:	mov    -0x1e(%ebp),%dl
   0x000000b2 <+178>:	xor    $0x49,%ebx
   0x000000b5 <+181>:	xor    $0x4e,%ecx
   0x000000b8 <+184>:	xor    $0x46,%edx
   0x000000bb <+187>:	jmp    0xe2 <mp_format_float+226>
   0x000000bd <+189>:	fldl   -0x30(%ebp)
   0x000000c0 <+192>:	fxam   
   0x000000c2 <+194>:	fstsw  %ax
   0x000000c5 <+197>:	fstp   %st(0)
   0x000000c7 <+199>:	and    $0x4500,%ax
   0x000000cb <+203>:	cmp    $0x100,%ax
   0x000000cf <+207>:	jne    0xf2 <mp_format_float+242>
   0x000000d1 <+209>:	mov    -0x1e(%ebp),%dl
   0x000000d4 <+212>:	mov    -0x1e(%ebp),%cl
   0x000000d7 <+215>:	lea    0x3(%esi),%eax
   0x000000da <+218>:	xor    $0x4e,%edx
   0x000000dd <+221>:	xor    $0x41,%ecx
   0x000000e0 <+224>:	mov    %edx,%ebx
   0x000000e2 <+226>:	mov    %bl,(%esi)
   0x000000e4 <+228>:	mov    %cl,0x1(%esi)
   0x000000e7 <+231>:	mov    %dl,0x2(%esi)
   0x000000ea <+234>:	movb   $0x0,(%eax)
   0x000000ed <+237>:	jmp    0x5cc <mp_format_float+1484>
   0x000000f2 <+242>:	mov    %edx,%eax
   0x000000f4 <+244>:	or     $0x20,%eax
   0x000000f7 <+247>:	cmp    $0x67,%al
   0x000000f9 <+249>:	mov    %al,-0x1d(%ebp)
   0x000000fc <+252>:	sete   -0x1f(%ebp)
   0x00000100 <+256>:	test   %ebx,%ebx
   0x00000102 <+258>:	js     0x120 <mp_format_float+288>
   0x00000104 <+260>:	movzbl -0x1f(%ebp),%edi
   0x00000108 <+264>:	sete   %al
   0x0000010b <+267>:	and    -0x1f(%ebp),%al
   0x0000010e <+270>:	cmovne %eax,%edi
   0x00000111 <+273>:	mov    %edi,%eax
   0x00000113 <+275>:	mov    %al,-0x1f(%ebp)
   0x00000116 <+278>:	mov    $0x1,%eax
   0x0000011b <+283>:	cmovne %eax,%ebx
   0x0000011e <+286>:	jmp    0x125 <mp_format_float+293>
   0x00000120 <+288>:	mov    $0x6,%ebx
   0x00000125 <+293>:	fldz   
   0x00000127 <+295>:	fldl   -0x30(%ebp)
   0x0000012a <+298>:	mov    0x14(%ebp),%edi
   0x0000012d <+301>:	mov    %esi,%eax
   0x0000012f <+303>:	sub    0x10(%ebp),%eax
   0x00000132 <+306>:	sub    %eax,%edi
   0x00000134 <+308>:	fucomip %st(1),%st
   0x00000136 <+310>:	fstp   %st(0)
   0x00000138 <+312>:	mov    %edi,-0x34(%ebp)
   0x0000013b <+315>:	lea    -0x1(%edi),%edi
   0x0000013e <+318>:	jp     0x187 <mp_format_float+391>
   0x00000140 <+320>:	jne    0x187 <mp_format_float+391>
   0x00000142 <+322>:	cmpb   $0x66,-0x1d(%ebp)
   0x00000146 <+326>:	jne    0x15e <mp_format_float+350>
   0x00000148 <+328>:	lea    0x1(%ebx),%eax
   0x0000014b <+331>:	cmp    %edi,%eax
   0x0000014d <+333>:	jl     0x155 <mp_format_float+341>
   0x0000014f <+335>:	mov    -0x34(%ebp),%ebx
   0x00000152 <+338>:	sub    $0x3,%ebx
   0x00000155 <+341>:	lea    0x1(%ebx),%ecx
   0x00000158 <+344>:	xor    %eax,%eax
   0x0000015a <+346>:	xor    %edi,%edi
   0x0000015c <+348>:	jmp    0x17a <mp_format_float+378>
   0x0000015e <+350>:	lea    0x5(%ebx),%eax
   0x00000161 <+353>:	cmp    %edi,%eax
   0x00000163 <+355>:	jl     0x16b <mp_format_float+363>
   0x00000165 <+357>:	mov    -0x34(%ebp),%ebx
   0x00000168 <+360>:	sub    $0x7,%ebx
   0x0000016b <+363>:	xor    %edi,%edi
   0x0000016d <+365>:	xor    %eax,%eax
   0x0000016f <+367>:	xor    %ecx,%ecx
   0x00000171 <+369>:	mov    $0x2b,%dl
   0x00000173 <+371>:	cmpb   $0x65,-0x1d(%ebp)
   0x00000177 <+375>:	cmove  %edx,%edi
   0x0000017a <+378>:	xor    %edx,%edx
   0x0000017c <+380>:	mov    %edx,-0x34(%ebp)
   0x0000017f <+383>:	mov    %edx,-0x28(%ebp)
   0x00000182 <+386>:	jmp    0x392 <mp_format_float+914>
   0x00000187 <+391>:	fnstcw -0x1a(%ebp)
   0x0000018a <+394>:	mov    -0x2c(%ebp),%edx
   0x0000018d <+397>:	fldl   0x40
   0x00000193 <+403>:	mov    %edx,%eax
   0x00000195 <+405>:	shr    $0x14,%eax
   0x00000198 <+408>:	and    $0x7ff,%eax
   0x0000019d <+413>:	sub    $0x3ff,%eax
   0x000001a2 <+418>:	mov    %eax,-0x28(%ebp)
   0x000001a5 <+421>:	mov    -0x1a(%ebp),%ax
   0x000001a9 <+425>:	fimull -0x28(%ebp)
   0x000001ac <+428>:	or     $0xc,%ah
   0x000001af <+431>:	mov    %ax,-0x1c(%ebp)
   0x000001b3 <+435>:	fldcw  -0x1c(%ebp)
   0x000001b6 <+438>:	fistpl -0x28(%ebp)
   0x000001b9 <+441>:	fldcw  -0x1a(%ebp)
   0x000001bc <+444>:	mov    -0x28(%ebp),%eax
   0x000001bf <+447>:	fldl   -0x30(%ebp)
   0x000001c2 <+450>:	fld1   
   0x000001c4 <+452>:	fcomip %st(1),%st
   0x000001c6 <+454>:	fstp   %st(0)
   0x000001c8 <+456>:	jbe    0x295 <mp_format_float+661>
   0x000001ce <+462>:	fildl  -0x28(%ebp)
   0x000001d1 <+465>:	neg    %eax
   0x000001d3 <+467>:	mov    %eax,-0x40(%ebp)
   0x000001d6 <+470>:	fstpt  0xc(%esp)
   0x000001da <+474>:	flds   0x48
   0x000001e0 <+480>:	fstpt  (%esp)
   0x000001e3 <+483>:	call   0x1e8 <mp_format_float+488>
   0x000001e8 <+488>:	mov    -0x40(%ebp),%eax
   0x000001eb <+491>:	fstpl  -0x28(%ebp)
   0x000001ee <+494>:	fldl   -0x28(%ebp)
   0x000001f1 <+497>:	mov    %eax,-0x28(%ebp)
   0x000001f4 <+500>:	fldl   -0x30(%ebp)
   0x000001f7 <+503>:	fxch   %st(1)
   0x000001f9 <+505>:	fcomi  %st(1),%st
   0x000001fb <+507>:	fstp   %st(1)
   0x000001fd <+509>:	jbe    0x229 <mp_format_float+553>
   0x000001ff <+511>:	fstp   %st(0)
   0x00000201 <+513>:	incl   -0x28(%ebp)
   0x00000204 <+516>:	mov    -0x28(%ebp),%eax
   0x00000207 <+519>:	neg    %eax
   0x00000209 <+521>:	mov    %eax,-0x40(%ebp)
   0x0000020c <+524>:	fildl  -0x40(%ebp)
   0x0000020f <+527>:	fstpt  0xc(%esp)
   0x00000213 <+531>:	flds   0x48
   0x00000219 <+537>:	fstpt  (%esp)
   0x0000021c <+540>:	call   0x221 <mp_format_float+545>
   0x00000221 <+545>:	fstpl  -0x40(%ebp)
   0x00000224 <+548>:	fldl   -0x40(%ebp)
   0x00000227 <+551>:	jmp    0x1f4 <mp_format_float+500>
   0x00000229 <+553>:	cmpb   $0x66,-0x1d(%ebp)
   0x0000022d <+557>:	je     0x246 <mp_format_float+582>
   0x0000022f <+559>:	cmpl   $0x4,-0x28(%ebp)
   0x00000233 <+563>:	jg     0x266 <mp_format_float+614>
   0x00000235 <+565>:	cmpb   $0x0,-0x1f(%ebp)
   0x00000239 <+569>:	je     0x266 <mp_format_float+614>
   0x0000023b <+571>:	fstp   %st(0)
   0x0000023d <+573>:	mov    -0x28(%ebp),%eax
   0x00000240 <+576>:	lea    -0x1(%eax,%ebx,1),%ebx
   0x00000244 <+580>:	jmp    0x248 <mp_format_float+584>
   0x00000246 <+582>:	fstp   %st(0)
   0x00000248 <+584>:	lea    0x1(%ebx),%eax
   0x0000024b <+587>:	cmp    %edi,%eax
   0x0000024d <+589>:	jl     0x255 <mp_format_float+597>
   0x0000024f <+591>:	mov    -0x34(%ebp),%ebx
   0x00000252 <+594>:	sub    $0x3,%ebx
   0x00000255 <+597>:	xor    %edx,%edx
   0x00000257 <+599>:	lea    0x1(%ebx),%ecx
   0x0000025a <+602>:	xor    %eax,%eax
   0x0000025c <+604>:	xor    %edi,%edi
   0x0000025e <+606>:	mov    %edx,-0x34(%ebp)
   0x00000261 <+609>:	jmp    0x368 <mp_format_float+872>
   0x00000266 <+614>:	mov    -0x34(%ebp),%eax
   0x00000269 <+617>:	sub    $0x7,%eax
   0x0000026c <+620>:	cmp    %ebx,%eax
   0x0000026e <+622>:	jg     0x27e <mp_format_float+638>
   0x00000270 <+624>:	cmpb   $0x67,-0x1d(%ebp)
   0x00000274 <+628>:	mov    %eax,%ebx
   0x00000276 <+630>:	je     0x27e <mp_format_float+638>
   0x00000278 <+632>:	mov    -0x34(%ebp),%ebx
   0x0000027b <+635>:	sub    $0x8,%ebx
   0x0000027e <+638>:	fdivrl -0x30(%ebp)
   0x00000281 <+641>:	mov    -0x28(%ebp),%eax
   0x00000284 <+644>:	xor    %ecx,%ecx
   0x00000286 <+646>:	mov    $0x2d,%edi
   0x0000028b <+651>:	neg    %eax
   0x0000028d <+653>:	fstpl  -0x30(%ebp)
   0x00000290 <+656>:	jmp    0x38d <mp_format_float+909>
   0x00000295 <+661>:	inc    %eax
   0x00000296 <+662>:	mov    %eax,-0x40(%ebp)
   0x00000299 <+665>:	fildl  -0x40(%ebp)
   0x0000029c <+668>:	fstpt  0xc(%esp)
   0x000002a0 <+672>:	flds   0x48
   0x000002a6 <+678>:	fstpt  (%esp)
   0x000002a9 <+681>:	call   0x2ae <mp_format_float+686>
   0x000002ae <+686>:	fstpl  -0x40(%ebp)
   0x000002b1 <+689>:	fldl   -0x40(%ebp)
   0x000002b4 <+692>:	fldl   -0x30(%ebp)
   0x000002b7 <+695>:	fcomip %st(1),%st
   0x000002b9 <+697>:	fstp   %st(0)
   0x000002bb <+699>:	jb     0x2f0 <mp_format_float+752>
   0x000002bd <+701>:	mov    -0x28(%ebp),%eax
   0x000002c0 <+704>:	inc    %eax
   0x000002c1 <+705>:	mov    %eax,-0x40(%ebp)
   0x000002c4 <+708>:	mov    -0x28(%ebp),%eax
   0x000002c7 <+711>:	add    $0x2,%eax
   0x000002ca <+714>:	mov    %eax,-0x28(%ebp)
   0x000002cd <+717>:	fildl  -0x28(%ebp)
   0x000002d0 <+720>:	fstpt  0xc(%esp)
   0x000002d4 <+724>:	flds   0x48
   0x000002da <+730>:	fstpt  (%esp)
   0x000002dd <+733>:	call   0x2e2 <mp_format_float+738>
   0x000002e2 <+738>:	mov    -0x40(%ebp),%eax
   0x000002e5 <+741>:	fstpl  -0x28(%ebp)
   0x000002e8 <+744>:	fldl   -0x28(%ebp)
   0x000002eb <+747>:	mov    %eax,-0x28(%ebp)
   0x000002ee <+750>:	jmp    0x2b4 <mp_format_float+692>
   0x000002f0 <+752>:	cmpb   $0x66,-0x1d(%ebp)
   0x000002f4 <+756>:	jne    0x31b <mp_format_float+795>
   0x000002f6 <+758>:	cmp    %edi,-0x28(%ebp)
   0x000002f9 <+761>:	jge    0x329 <mp_format_float+809>
   0x000002fb <+763>:	mov    -0x28(%ebp),%eax
   0x000002fe <+766>:	lea    0x1(%eax,%ebx,1),%eax
   0x00000302 <+770>:	cmp    %edi,%eax
   0x00000304 <+772>:	jl     0x35c <mp_format_float+860>
   0x00000306 <+774>:	mov    %edi,%eax
   0x00000308 <+776>:	mov    -0x28(%ebp),%edi
   0x0000030b <+779>:	sub    %edi,%eax
   0x0000030d <+781>:	lea    -0x2(%eax),%ebx
   0x00000310 <+784>:	dec    %eax
   0x00000311 <+785>:	mov    $0x0,%eax
   0x00000316 <+790>:	cmove  %eax,%ebx
   0x00000319 <+793>:	jmp    0x35c <mp_format_float+860>
   0x0000031b <+795>:	cmpb   $0x65,-0x1d(%ebp)
   0x0000031f <+799>:	je     0x329 <mp_format_float+809>
   0x00000321 <+801>:	cmpb   $0x67,-0x1d(%ebp)
   0x00000325 <+805>:	je     0x33b <mp_format_float+827>
   0x00000327 <+807>:	jmp    0x383 <mp_format_float+899>
   0x00000329 <+809>:	mov    -0x34(%ebp),%eax
   0x0000032c <+812>:	sub    $0x7,%eax
   0x0000032f <+815>:	cmp    %ebx,%eax
   0x00000331 <+817>:	jg     0x36e <mp_format_float+878>
   0x00000333 <+819>:	mov    -0x34(%ebp),%ebx
   0x00000336 <+822>:	sub    $0x8,%ebx
   0x00000339 <+825>:	jmp    0x36e <mp_format_float+878>
   0x0000033b <+827>:	lea    0x5(%ebx),%eax
   0x0000033e <+830>:	cmp    %edi,%eax
   0x00000340 <+832>:	jl     0x34f <mp_format_float+847>
   0x00000342 <+834>:	mov    -0x34(%ebp),%ebx
   0x00000345 <+837>:	sub    $0x7,%ebx
   0x00000348 <+840>:	cmp    %ebx,-0x28(%ebp)
   0x0000034b <+843>:	jl     0x356 <mp_format_float+854>
   0x0000034d <+845>:	jmp    0x383 <mp_format_float+899>
   0x0000034f <+847>:	mov    -0x28(%ebp),%eax
   0x00000352 <+850>:	cmp    %eax,%ebx
   0x00000354 <+852>:	jle    0x383 <mp_format_float+899>
   0x00000356 <+854>:	mov    -0x28(%ebp),%eax
   0x00000359 <+857>:	inc    %eax
   0x0000035a <+858>:	sub    %eax,%ebx
   0x0000035c <+860>:	mov    -0x28(%ebp),%eax
   0x0000035f <+863>:	xor    %edi,%edi
   0x00000361 <+865>:	mov    %eax,-0x34(%ebp)
   0x00000364 <+868>:	lea    0x1(%eax,%ebx,1),%ecx
   0x00000368 <+872>:	movb   $0x66,-0x1d(%ebp)
   0x0000036c <+876>:	jmp    0x392 <mp_format_float+914>
   0x0000036e <+878>:	xor    %edx,%edx
   0x00000370 <+880>:	movb   $0x65,-0x1d(%ebp)
   0x00000374 <+884>:	mov    -0x28(%ebp),%eax
   0x00000377 <+887>:	xor    %ecx,%ecx
   0x00000379 <+889>:	mov    %edx,-0x34(%ebp)
   0x0000037c <+892>:	mov    $0x2b,%edi
   0x00000381 <+897>:	jmp    0x392 <mp_format_float+914>
   0x00000383 <+899>:	mov    -0x28(%ebp),%eax
   0x00000386 <+902>:	xor    %ecx,%ecx
   0x00000388 <+904>:	mov    $0x2b,%edi
   0x0000038d <+909>:	xor    %edx,%edx
   0x0000038f <+911>:	mov    %edx,-0x34(%ebp)
   0x00000392 <+914>:	xor    %edx,%edx
   0x00000394 <+916>:	test   %ebx,%ebx
   0x00000396 <+918>:	cmovns %ebx,%edx
   0x00000399 <+921>:	cmpb   $0x65,-0x1d(%ebp)
   0x0000039d <+925>:	mov    %edx,-0x38(%ebp)
   0x000003a0 <+928>:	jne    0x3a7 <mp_format_float+935>
   0x000003a2 <+930>:	mov    %edx,%ecx
   0x000003a4 <+932>:	inc    %ecx
   0x000003a5 <+933>:	jmp    0x3bb <mp_format_float+955>
   0x000003a7 <+935>:	cmpb   $0x67,-0x1d(%ebp)
   0x000003ab <+939>:	jne    0x3bb <mp_format_float+955>
   0x000003ad <+941>:	test   %ebx,%ebx
   0x000003af <+943>:	mov    $0x1,%ecx
   0x000003b4 <+948>:	cmovg  -0x38(%ebp),%ecx
   0x000003b8 <+952>:	mov    %ecx,-0x38(%ebp)
   0x000003bb <+955>:	xor    %ebx,%ebx
   0x000003bd <+957>:	mov    %eax,-0x44(%ebp)
   0x000003c0 <+960>:	mov    %ebx,-0x40(%ebp)
   0x000003c3 <+963>:	test   %ecx,%ecx
   0x000003c5 <+965>:	js     0x465 <mp_format_float+1125>
   0x000003cb <+971>:	cmpl   $0x0,-0x44(%ebp)
   0x000003cf <+975>:	fld1   
   0x000003d1 <+977>:	jle    0x3fc <mp_format_float+1020>
   0x000003d3 <+979>:	fstp   %st(0)
   0x000003d5 <+981>:	fildl  -0x44(%ebp)
   0x000003d8 <+984>:	mov    %eax,-0x4c(%ebp)
   0x000003db <+987>:	mov    %ecx,-0x48(%ebp)
   0x000003de <+990>:	fstpt  0xc(%esp)
   0x000003e2 <+994>:	flds   0x48
   0x000003e8 <+1000>:	fstpt  (%esp)
   0x000003eb <+1003>:	call   0x3f0 <mp_format_float+1008>
   0x000003f0 <+1008>:	mov    -0x4c(%ebp),%eax
   0x000003f3 <+1011>:	mov    -0x48(%ebp),%ecx
   0x000003f6 <+1014>:	fstpl  -0x40(%ebp)
   0x000003f9 <+1017>:	fldl   -0x40(%ebp)
   0x000003fc <+1020>:	xor    %edx,%edx
   0x000003fe <+1022>:	mov    %edx,-0x40(%ebp)
   0x00000401 <+1025>:	fldl   -0x30(%ebp)
   0x00000404 <+1028>:	fxch   %st(1)
   0x00000406 <+1030>:	fcomi  %st(1),%st
   0x00000408 <+1032>:	ja     0x41e <mp_format_float+1054>
   0x0000040a <+1034>:	incl   -0x40(%ebp)
   0x0000040d <+1037>:	cmpl   $0x9,-0x40(%ebp)
   0x00000411 <+1041>:	fsubr  %st,%st(1)
   0x00000413 <+1043>:	fxch   %st(1)
   0x00000415 <+1045>:	fstpl  -0x30(%ebp)
   0x00000418 <+1048>:	jne    0x401 <mp_format_float+1025>
   0x0000041a <+1050>:	fstp   %st(0)
   0x0000041c <+1052>:	jmp    0x422 <mp_format_float+1058>
   0x0000041e <+1054>:	fstp   %st(0)
   0x00000420 <+1056>:	fstp   %st(0)
   0x00000422 <+1058>:	test   %ecx,%ecx
   0x00000424 <+1060>:	je     0x44a <mp_format_float+1098>
   0x00000426 <+1062>:	mov    -0x40(%ebp),%dl
   0x00000429 <+1065>:	lea    0x30(%edx),%ebx
   0x0000042c <+1068>:	mov    -0x34(%ebp),%edx
   0x0000042f <+1071>:	mov    %bl,(%esi)
   0x00000431 <+1073>:	sub    %eax,%edx
   0x00000433 <+1075>:	mov    %edx,%ebx
   0x00000435 <+1077>:	add    -0x44(%ebp),%ebx
   0x00000438 <+1080>:	jne    0x440 <mp_format_float+1088>
   0x0000043a <+1082>:	cmpl   $0x0,-0x38(%ebp)
   0x0000043e <+1086>:	jg     0x443 <mp_format_float+1091>
   0x00000440 <+1088>:	inc    %esi
   0x00000441 <+1089>:	jmp    0x44a <mp_format_float+1098>
   0x00000443 <+1091>:	movb   $0x2e,0x1(%esi)
   0x00000447 <+1095>:	add    $0x2,%esi
   0x0000044a <+1098>:	dec    %ecx
   0x0000044b <+1099>:	cmpl   $0x0,-0x44(%ebp)
   0x0000044f <+1103>:	jg     0x45d <mp_format_float+1117>
   0x00000451 <+1105>:	fldl   -0x30(%ebp)
   0x00000454 <+1108>:	fmuls  0x48
   0x0000045a <+1114>:	fstpl  -0x30(%ebp)
   0x0000045d <+1117>:	decl   -0x44(%ebp)
   0x00000460 <+1120>:	jmp    0x3c3 <mp_format_float+963>
   0x00000465 <+1125>:	cmpl   $0x4,-0x40(%ebp)
   0x00000469 <+1129>:	lea    -0x1(%esi),%eax
   0x0000046c <+1132>:	jle    0x50e <mp_format_float+1294>
   0x00000472 <+1138>:	mov    (%eax),%dl
   0x00000474 <+1140>:	mov    %eax,%ebx
   0x00000476 <+1142>:	cmp    $0x2e,%dl
   0x00000479 <+1145>:	jne    0x47e <mp_format_float+1150>
   0x0000047b <+1147>:	dec    %eax
   0x0000047c <+1148>:	jmp    0x472 <mp_format_float+1138>
   0x0000047e <+1150>:	lea    -0x30(%edx),%ecx
   0x00000481 <+1153>:	cmp    $0x9,%cl
   0x00000484 <+1156>:	jbe    0x489 <mp_format_float+1161>
   0x00000486 <+1158>:	inc    %ebx
   0x00000487 <+1159>:	jmp    0x49b <mp_format_float+1179>
   0x00000489 <+1161>:	cmp    $0x39,%dl
   0x0000048c <+1164>:	je     0x493 <mp_format_float+1171>
   0x0000048e <+1166>:	inc    %edx
   0x0000048f <+1167>:	mov    %dl,(%eax)
   0x00000491 <+1169>:	jmp    0x49b <mp_format_float+1179>
   0x00000493 <+1171>:	movb   $0x30,(%eax)
   0x00000496 <+1174>:	cmp    0x10(%ebp),%eax
   0x00000499 <+1177>:	jne    0x47b <mp_format_float+1147>
   0x0000049b <+1179>:	cmpb   $0x30,(%ebx)
   0x0000049e <+1182>:	jne    0x50e <mp_format_float+1294>
   0x000004a0 <+1184>:	cmpb   $0x2e,0x1(%ebx)
   0x000004a4 <+1188>:	jne    0x4c6 <mp_format_float+1222>
   0x000004a6 <+1190>:	cmpb   $0x66,-0x1d(%ebp)
   0x000004aa <+1194>:	je     0x4c6 <mp_format_float+1222>
   0x000004ac <+1196>:	mov    %edi,%eax
   0x000004ae <+1198>:	movw   $0x302e,(%ebx)
   0x000004b3 <+1203>:	cmp    $0x2d,%al
   0x000004b5 <+1205>:	jne    0x4c1 <mp_format_float+1217>
   0x000004b7 <+1207>:	decl   -0x28(%ebp)
   0x000004ba <+1210>:	mov    $0x2b,%al
   0x000004bc <+1212>:	cmove  %eax,%edi
   0x000004bf <+1215>:	jmp    0x4d4 <mp_format_float+1236>
   0x000004c1 <+1217>:	incl   -0x28(%ebp)
   0x000004c4 <+1220>:	jmp    0x4d4 <mp_format_float+1236>
   0x000004c6 <+1222>:	lea    0x1(%esi),%eax
   0x000004c9 <+1225>:	mov    %eax,%edx
   0x000004cb <+1227>:	sub    0x10(%ebp),%edx
   0x000004ce <+1230>:	cmp    0x14(%ebp),%edx
   0x000004d1 <+1233>:	cmovb  %eax,%esi
   0x000004d4 <+1236>:	mov    %esi,%eax
   0x000004d6 <+1238>:	xor    %edx,%edx
   0x000004d8 <+1240>:	sub    %ebx,%eax
   0x000004da <+1242>:	cmp    %ebx,%esi
   0x000004dc <+1244>:	cmovb  %edx,%eax
   0x000004df <+1247>:	mov    %eax,0x8(%esp)
   0x000004e3 <+1251>:	mov    %ebx,%eax
   0x000004e5 <+1253>:	sub    %esi,%eax
   0x000004e7 <+1255>:	cmp    %ebx,%esi
   0x000004e9 <+1257>:	cmovb  %edx,%eax
   0x000004ec <+1260>:	mov    $0x1,%edx
   0x000004f1 <+1265>:	add    %esi,%eax
   0x000004f3 <+1267>:	mov    %eax,0x4(%esp)
   0x000004f7 <+1271>:	lea    0x1(%ebx),%eax
   0x000004fa <+1274>:	sub    %esi,%eax
   0x000004fc <+1276>:	cmp    %ebx,%esi
   0x000004fe <+1278>:	cmovb  %edx,%eax
   0x00000501 <+1281>:	add    %esi,%eax
   0x00000503 <+1283>:	mov    %eax,(%esp)
   0x00000506 <+1286>:	call   0x50b <mp_format_float+1291>
   0x0000050b <+1291>:	movb   $0x31,(%ebx)
   0x0000050e <+1294>:	lea    0x1(%esi),%eax
   0x00000511 <+1297>:	sub    0x10(%ebp),%eax
   0x00000514 <+1300>:	cmp    %eax,0x14(%ebp)
   0x00000517 <+1303>:	jae    0x536 <mp_format_float+1334>
   0x00000519 <+1305>:	movl   $0x193,0x8(%esp)
   0x00000521 <+1313>:	movl   $0x0,0x4(%esp)
   0x00000529 <+1321>:	movl   $0x17,(%esp)
   0x00000530 <+1328>:	call   *0x0
   0x00000536 <+1334>:	cmpl   $0x0,-0x38(%ebp)
   0x0000053a <+1338>:	jle    0x551 <mp_format_float+1361>
   0x0000053c <+1340>:	cmpb   $0x0,-0x1f(%ebp)
   0x00000540 <+1344>:	je     0x551 <mp_format_float+1361>
   0x00000542 <+1346>:	mov    -0x1(%esi),%al
   0x00000545 <+1349>:	cmp    $0x30,%al
   0x00000547 <+1351>:	jne    0x54c <mp_format_float+1356>
   0x00000549 <+1353>:	dec    %esi
   0x0000054a <+1354>:	jmp    0x542 <mp_format_float+1346>
   0x0000054c <+1356>:	cmp    $0x2e,%al
   0x0000054e <+1358>:	jne    0x551 <mp_format_float+1361>
   0x00000550 <+1360>:	dec    %esi
   0x00000551 <+1361>:	mov    %edi,%eax
   0x00000553 <+1363>:	test   %al,%al
   0x00000555 <+1365>:	je     0x59f <mp_format_float+1439>
   0x00000557 <+1367>:	mov    -0x1e(%ebp),%al
   0x0000055a <+1370>:	lea    0x2(%esi),%ecx
   0x0000055d <+1373>:	or     $0x45,%eax
   0x00000560 <+1376>:	cmpl   $0x63,-0x28(%ebp)
   0x00000564 <+1380>:	mov    %al,(%esi)
   0x00000566 <+1382>:	mov    %edi,%eax
   0x00000568 <+1384>:	mov    %al,0x1(%esi)
   0x0000056b <+1387>:	jle    0x581 <mp_format_float+1409>
   0x0000056d <+1389>:	mov    -0x28(%ebp),%eax
   0x00000570 <+1392>:	mov    $0x64,%ebx
   0x00000575 <+1397>:	lea    0x3(%esi),%ecx
   0x00000578 <+1400>:	cltd   
   0x00000579 <+1401>:	idiv   %ebx
   0x0000057b <+1403>:	add    $0x30,%eax
   0x0000057e <+1406>:	mov    %al,0x2(%esi)
   0x00000581 <+1409>:	mov    -0x28(%ebp),%eax
   0x00000584 <+1412>:	mov    $0xa,%ebx
   0x00000589 <+1417>:	lea    0x2(%ecx),%esi
   0x0000058c <+1420>:	cltd   
   0x0000058d <+1421>:	idiv   %ebx
   0x0000058f <+1423>:	mov    %edx,%edi
   0x00000591 <+1425>:	cltd   
   0x00000592 <+1426>:	idiv   %ebx
   0x00000594 <+1428>:	lea    0x30(%edi),%eax
   0x00000597 <+1431>:	mov    %al,0x1(%ecx)
   0x0000059a <+1434>:	add    $0x30,%edx
   0x0000059d <+1437>:	mov    %dl,(%ecx)
   0x0000059f <+1439>:	lea    0x1(%esi),%eax
   0x000005a2 <+1442>:	movb   $0x0,(%esi)
   0x000005a5 <+1445>:	sub    0x10(%ebp),%eax
   0x000005a8 <+1448>:	cmp    %eax,0x14(%ebp)
   0x000005ab <+1451>:	jae    0x5ca <mp_format_float+1482>
   0x000005ad <+1453>:	movl   $0x1ab,0x8(%esp)
   0x000005b5 <+1461>:	movl   $0x0,0x4(%esp)
   0x000005bd <+1469>:	movl   $0x17,(%esp)
   0x000005c4 <+1476>:	call   *0x0
   0x000005ca <+1482>:	mov    %esi,%eax
   0x000005cc <+1484>:	sub    0x10(%ebp),%eax
   0x000005cf <+1487>:	add    $0x6c,%esp
   0x000005d2 <+1490>:	pop    %ebx
   0x000005d3 <+1491>:	pop    %esi
   0x000005d4 <+1492>:	pop    %edi
   0x000005d5 <+1493>:	pop    %ebp
   0x000005d6 <+1494>:	ret    
End of assembler dump.

They look quite similar - neither calls any actual functions, so gcc must be optimising the pow() calls as inline assembly. So I'm guessing/hoping this doesn't make it any less efficient.

@github-actions
Copy link

Code size report:

   bare-arm:    +0 +0.000% 
minimal x86:    +0 +0.000% 

@dlech dlech force-pushed the mingw32-float-format branch from 5232f70 to fcb3d6d Compare December 19, 2022 00:42
@github-actions
Copy link

Code size report:

   bare-arm:    +0 +0.000% 
minimal x86:    +0 +0.000% 

@codecov-commenter
Copy link

codecov-commenter commented Dec 19, 2022

Codecov Report

All modified and coverable lines are covered by tests ✅

Comparison is base (ac8e7f7) 98.36% compared to head (fcb3d6d) 98.49%.

❗ Current head fcb3d6d differs from pull request most recent head 23342ef. Consider uploading reports for the commit 23342ef to get more accurate results

Additional details and impacted files
@@            Coverage Diff             @@
##           master   #10267      +/-   ##
==========================================
+ Coverage   98.36%   98.49%   +0.13%     
==========================================
  Files         159      155       -4     
  Lines       21093    20528     -565     
==========================================
- Hits        20748    20220     -528     
+ Misses        345      308      -37     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

@dpgeorge
Copy link
Member

I'm not sure this is the right solution. What's probably happening is that it's using 80-bit float precision internally and that's making a slight difference to rounding in the uPy float algorithm. Or the compiler (with optimisation) ignores the IEEE standard; see -funsafe-math-optimizations and -ffast-math.

Maybe there's a compiler option that can be used to fix this? Maybe we can compile floatformat.c with -O0 unconditionally on mingw32?

Otherwise we can simply skip this test on mingw32 (see RUN_TESTS_SKIP in ports/windows/Makefile).

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

Maybe there's a compiler option that can be used to fix this? Maybe we can compile floatformat.c with -O0 unconditionally on mingw32?

We can actually just not optimize the one function. And I have confirmed it fixes the problem too. But since the problem is just with the pow() functions, it seemed like a bit overkill for such a large function compared to using powl() which just changes a few CPU instructions.

diff --git a/py/formatfloat.c b/py/formatfloat.c
index fc1b2fe7f..1daee1360 100644
--- a/py/formatfloat.c
+++ b/py/formatfloat.c
@@ -98,6 +98,11 @@ static inline int fp_expval(FPTYPE x) {
     return (int)((fb.i >> MP_FLOAT_FRAC_BITS) & (~(0xFFFFFFFF << MP_FLOAT_EXP_BITS))) - MP_FLOAT_EXP_OFFSET;
 }
 
+#if MICROPY_FLOAT_IMPL == MICROPY_FLOAT_IMPL_DOUBLE && defined(__GNUC__) && defined(__MINGW32__) && defined(__i386__)
+// When optimizations are enabled using mingw's gcc, it breaks pow(double, double)
+// and gives wrong results.
+__attribute__((optimize("O0")))
+#endif
 int mp_format_float(FPTYPE f, char *buf, size_t buf_size, char fmt, int prec, char sign) {
 
     char *s = buf;

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

I suppose another option could be to create a wrapper around pow() and compile that function "O0" so that it actually calls pow() instead of inline assembly. EDIT: nope, doesn't seem to work.

@dlech
Copy link
Contributor Author

dlech commented Dec 19, 2022

Maybe there's a compiler option that can be used to fix this?

I think I found the answer: https://lemire.me/blog/2020/06/26/gcc-not-nearest/

@dlech dlech force-pushed the mingw32-float-format branch from fcb3d6d to 847bc69 Compare December 19, 2022 04:10
@dlech dlech force-pushed the mingw32-float-format branch 3 times, most recently from 38e455a to 7d96388 Compare December 19, 2022 04:59
@@ -84,6 +84,11 @@ ifneq ($(FROZEN_MANIFEST),)
CFLAGS += -DMICROPY_QSTR_EXTRA_POOL=mp_qstr_frozen_const_pool -DMICROPY_MODULE_FROZEN_MPY=1 -DMPZ_DIG_SIZE=16
endif

ifeq ($(shell $(CC) -dumpmachine),i686-w64-mingw32)
# https://lemire.me/blog/2020/06/26/gcc-not-nearest
CFLAGS += -msse -mfpmath=sse -march=pentium4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs a more substantial comment, eg "force gcc to use IEEE correct rounding when optimising float constants at compile time"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated

@dlech dlech force-pushed the mingw32-float-format branch 2 times, most recently from 611b631 to e58d368 Compare December 19, 2022 05:31
@stinos
Copy link
Contributor

stinos commented Dec 19, 2022

FYI for the msvc build we also use -fp:precise (the default) exactly for this reason.

When compiler optimizations are enabled on the mingw version of gcc, we are
getting failing tests because of rounding issues, for example:

    print(float("1e24"))

would print

    9.999999999999999e+23

instead of

    1e+24

It turns out special compiler options are needed to get GCC to use the SSE
instruction set instead of the 387 coprocessor (which uses 80-bit precision
internall).

Signed-off-by: David Lechner <david@pybricks.com>
@dpgeorge dpgeorge force-pushed the mingw32-float-format branch from e58d368 to 23342ef Compare February 5, 2024 03:06
@dpgeorge dpgeorge merged commit 23342ef into micropython:master Feb 5, 2024
@dlech dlech deleted the mingw32-float-format branch February 5, 2024 03:35
tannewt added a commit to tannewt/circuitpython that referenced this pull request May 3, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
py-core Relates to py/ directory in source
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants