diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst
index 0aeeadd40fae..c96e7444f61a 100644
--- a/doc/release/1.14.0-notes.rst
+++ b/doc/release/1.14.0-notes.rst
@@ -16,6 +16,8 @@ New functions
 
 * ``parametrize``: decorator added to numpy.testing
 * ``chebinterpolate``: Interpolate function at Chebyshev points.
+* ``format_float_positional`` and ``format_float_scientific`` : format
+  floating-point scalars unambiguously with control of rounding and padding.
 
 
 Deprecations
@@ -306,6 +308,28 @@ now supported for these arrays:
  * `arr.resize(...)`
  * `pickle.dumps(arr)`
 
+Float printing now uses "dragon4" algorithm for shortest decimal representation
+-------------------------------------------------------------------------------
+All numpy floating-point types (16, 32, 64 and 128 bit) can now be printed to
+give shortest decimal representation of the number, which uniquely identifies
+the value from others of the same type.
+
+New functions ``np.format_float_scientific`` and ``np.format_float_positional``
+are provided to generate these decimal representations, with control over
+rounding, trimming and padding.
+
+The ``str`` and ``repr`` of floating-point scalars now shows the shortest
+unique decimal representation. This means there will be a different number of
+digits compared to numpy 1.13, for instance float64 and float128 will be longer
+and float16 will be shorter.
+
+For arrays of floating-point type, a new formatting option ``floatmode`` has
+been added to ``np.set_printoptions`` and ``np.array2string``, which gives
+control over uniqueness and rounding of printed elements in arrays. The new
+default is ``floatmode='maxprec'`` with ``precision=8``, which will print at most
+8 fractional digits, or fewer if an element can be uniquely represented with
+fewer.
+
 
 Changes
 =======
diff --git a/doc/source/reference/routines.io.rst b/doc/source/reference/routines.io.rst
index 6747f60bd15d..00cafebc2650 100644
--- a/doc/source/reference/routines.io.rst
+++ b/doc/source/reference/routines.io.rst
@@ -45,6 +45,8 @@ String formatting
    array2string
    array_repr
    array_str
+   format_float_positional
+   format_float_scientific
 
 Memory mapping files
 --------------------
diff --git a/numpy/core/arrayprint.py b/numpy/core/arrayprint.py
index 7ce6e0795605..8253713be8de 100644
--- a/numpy/core/arrayprint.py
+++ b/numpy/core/arrayprint.py
@@ -6,7 +6,8 @@
 from __future__ import division, absolute_import, print_function
 
 __all__ = ["array2string", "array_str", "array_repr", "set_string_function",
-           "set_printoptions", "get_printoptions"]
+           "set_printoptions", "get_printoptions", "format_float_positional",
+           "format_float_scientific"]
 __docformat__ = 'restructuredtext'
 
 #
@@ -40,8 +41,8 @@
 from . import numerictypes as _nt
 from .umath import absolute, not_equal, isnan, isinf
 from . import multiarray
-from .multiarray import (array, format_longfloat, datetime_as_string,
-                         datetime_data, dtype, ndarray)
+from .multiarray import (array, dragon4_positional, dragon4_scientific,
+                         datetime_as_string, datetime_data, dtype, ndarray)
 from .fromnumeric import ravel, any
 from .numeric import concatenate, asarray, errstate
 from .numerictypes import (longlong, intc, int_, float_, complex_, bool_,
@@ -58,6 +59,7 @@
 _format_options = {
     'edgeitems': 3,  # repr N leading and trailing items of each dimension
     'threshold': 1000,  # total items > triggers array summarization
+    'floatmode': 'maxprec',
     'precision': 8,  # precision of floating point representations
     'suppress': False,  # suppress printing small floating values in exp format
     'linewidth': 75,
@@ -68,7 +70,7 @@
 
 def _make_options_dict(precision=None, threshold=None, edgeitems=None,
                        linewidth=None, suppress=None, nanstr=None, infstr=None,
-                       sign=None, formatter=None):
+                       sign=None, formatter=None, floatmode=None):
     """ make a dictionary out of the non-None arguments, plus sanity checks """
 
     options = {k: v for k, v in locals().items() if v is not None}
@@ -79,12 +81,17 @@ def _make_options_dict(precision=None, threshold=None, edgeitems=None,
     if sign not in [None, '-', '+', ' ', 'legacy']:
         raise ValueError("sign option must be one of "
                          "' ', '+', '-', or 'legacy'")
+    
+    modes = ['fixed', 'unique', 'maxprec', 'maxprec_equal']
+    if floatmode not in modes + [None]:
+        raise ValueError("floatmode option must be one of " +
+                         ", ".join('"{}"'.format(m) for m in modes))
 
     return options
 
 def set_printoptions(precision=None, threshold=None, edgeitems=None,
                      linewidth=None, suppress=None, nanstr=None, infstr=None,
-                     formatter=None, sign=None):
+                     formatter=None, sign=None, floatmode=None):
     """
     Set printing options.
 
@@ -146,6 +153,23 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
             - 'float_kind' : sets 'float' and 'longfloat'
             - 'complex_kind' : sets 'complexfloat' and 'longcomplexfloat'
             - 'str_kind' : sets 'str' and 'numpystr'
+    floatmode : str, optional
+        Controls the interpretation of the `precision` option for
+        floating-point types. Can take the following values:
+            - 'fixed' : Always print exactly `precision` fractional digits,
+                    even if this would print more or fewer digits than
+                    necessary to specify the value uniquely.
+            - 'unique : Print the minimum number of fractional digits necessary
+                    to represent each value uniquely. Different elements may
+                    have a different number of digits. The value of the
+                    `precision` option is ignored.
+            - 'maxprec' : Print at most `precision` fractional digits, but if
+                    an element can be uniquely represented with fewer digits
+                    only print it with that many.
+            - 'maxprec_equal' : Print at most `precision` fractional digits,
+                    but if every element in the array can be uniquely
+                    represented with an equal number of fewer digits, use that
+                    many digits for all elements.
 
     See Also
     --------
@@ -196,7 +220,8 @@ def set_printoptions(precision=None, threshold=None, edgeitems=None,
     ... suppress=False, threshold=1000, formatter=None)
     """
     opt = _make_options_dict(precision, threshold, edgeitems, linewidth,
-                             suppress, nanstr, infstr, sign, formatter)
+                             suppress, nanstr, infstr, sign, formatter,
+                             floatmode)
     # formatter is always reset
     opt['formatter'] = formatter
     _format_options.update(opt)
@@ -259,15 +284,16 @@ def repr_format(x):
     return repr(x)
 
 def _get_formatdict(data, **opt):
-    prec, supp, sign = opt['precision'], opt['suppress'], opt['sign']
+    prec, fmode = opt['precision'], opt['floatmode']
+    supp, sign = opt['suppress'], opt['sign']
 
     # wrapped in lambdas to avoid taking a code path with the wrong type of data
     formatdict = {'bool': lambda: BoolFormat(data),
                   'int': lambda: IntegerFormat(data),
-                  'float': lambda: FloatFormat(data, prec, supp, sign),
-                  'longfloat': lambda: LongFloatFormat(prec),
-                  'complexfloat': lambda: ComplexFormat(data, prec, supp, sign),
-                  'longcomplexfloat': lambda: LongComplexFormat(prec),
+                  'float': lambda: FloatingFormat(data, prec, fmode, 
+                                                  supp, sign),
+                  'complexfloat': lambda: ComplexFormat(data, prec, fmode,
+                                                        supp, sign),
                   'datetime': lambda: DatetimeFormat(data),
                   'timedelta': lambda: TimedeltaFormat(data),
                   'object': lambda: _object_format,
@@ -289,7 +315,7 @@ def indirect(x):
             for key in ['int']:
                 formatdict[key] = indirect(formatter['int_kind'])
         if 'float_kind' in fkeys:
-            for key in ['float', 'longfloat']:
+            for key in ['half', 'float', 'longfloat']:
                 formatdict[key] = indirect(formatter['float_kind'])
         if 'complex_kind' in fkeys:
             for key in ['complexfloat', 'longcomplexfloat']:
@@ -321,15 +347,9 @@ def _get_format_function(data, **options):
         else:
             return formatdict['int']()
     elif issubclass(dtypeobj, _nt.floating):
-        if issubclass(dtypeobj, _nt.longfloat):
-            return formatdict['longfloat']()
-        else:
-            return formatdict['float']()
+        return formatdict['float']()
     elif issubclass(dtypeobj, _nt.complexfloating):
-        if issubclass(dtypeobj, _nt.clongfloat):
-            return formatdict['longcomplexfloat']()
-        else:
-            return formatdict['complexfloat']()
+        return formatdict['complexfloat']()
     elif issubclass(dtypeobj, (_nt.unicode_, _nt.string_)):
         return formatdict['numpystr']()
     elif issubclass(dtypeobj, _nt.datetime64):
@@ -396,7 +416,7 @@ def _array2string(a, options, separator=' ', prefix=""):
 def array2string(a, max_line_width=None, precision=None,
                  suppress_small=None, separator=' ', prefix="",
                  style=np._NoValue, formatter=None, threshold=None,
-                 edgeitems=None, sign=None):
+                 edgeitems=None, sign=None, floatmode=None):
     """
     Return a string representation of an array.
 
@@ -463,6 +483,23 @@ def array2string(a, max_line_width=None, precision=None,
         (whitespace character) in the sign position of positive values.  If
         '-', omit the sign character of positive values. If 'legacy', print a
         space for positive values except in 0d arrays.
+    floatmode : str, optional
+        Controls the interpretation of the `precision` option for
+        floating-point types. Can take the following values:
+            - 'fixed' : Always print exactly `precision` fractional digits,
+                    even if this would print more or fewer digits than
+                    necessary to specify the value uniquely.
+            - 'unique : Print the minimum number of fractional digits necessary
+                    to represent each value uniquely. Different elements may
+                    have a different number of digits.  The value of the
+                    `precision` option is ignored.
+            - 'maxprec' : Print at most `precision` fractional digits, but if
+                    an element can be uniquely represented with fewer digits
+                    only print it with that many.
+            - 'maxprec_equal' : Print at most `precision` fractional digits,
+                    but if every element in the array can be uniquely
+                    represented with an equal number of fewer digits, use that
+                    many digits for all elements.
 
     Returns
     -------
@@ -510,7 +547,7 @@ def array2string(a, max_line_width=None, precision=None,
 
     overrides = _make_options_dict(precision, threshold, edgeitems,
                                    max_line_width, suppress_small, None, None,
-                                   sign, formatter)
+                                   sign, formatter, floatmode)
     options = _format_options.copy()
     options.update(overrides)
 
@@ -597,8 +634,8 @@ def _formatArray(a, format_function, rank, max_line_len,
                           summary_insert).rstrip()+']\n'
     return s
 
-class FloatFormat(object):
-    def __init__(self, data, precision, suppress_small, sign=False):
+class FloatingFormat(object):
+    def __init__(self, data, precision, floatmode, suppress_small, sign=False):
         # for backcompatibility, accept bools
         if isinstance(sign, bool):
             sign = '+' if sign else '-'
@@ -607,8 +644,15 @@ def __init__(self, data, precision, suppress_small, sign=False):
         if sign == 'legacy':
             self._legacy = True
             sign = '-' if data.shape == () else ' '
-
-        self.precision = precision
+        
+        self.floatmode = floatmode
+        if floatmode == 'unique':
+            self.precision = -1
+        else:
+            if precision < 0:
+                raise ValueError(
+                    "precision must be >= 0 in {} mode".format(floatmode))
+            self.precision = precision
         self.suppress_small = suppress_small
         self.sign = sign
         self.exp_format = False
@@ -637,74 +681,230 @@ def fillFormat(self, data):
                                            or max_val/min_val > 1000.):
                     self.exp_format = True
 
-        if self.exp_format:
-            self.large_exponent = 0 < min_val < 1e-99 or max_val >= 1e100
-
-            signpos = self.sign != '-' or any(non_zero < 0)
-            # for back-compatibility with np 1.13, use two spaces
+        if len(non_zero) == 0:
+            self.pad_left = 0
+            self.pad_right = 0
+            self.trim = '.'
+            self.exp_size = -1
+            self.unique = True
+        elif self.exp_format:
+            # first pass printing to determine sizes
+            trim, unique = '.', True
+            if self.floatmode == 'fixed' or self._legacy:
+                trim, unique = 'k', False
+            strs = (dragon4_scientific(x, precision=self.precision,
+                               unique=unique, trim=trim, sign=self.sign == '+')
+                    for x in non_zero)
+            frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs))
+            int_part, frac_part = zip(*(s.split('.') for s in frac_strs))
+            self.exp_size = max(len(s) for s in exp_strs) - 1
+
+            self.trim = 'k'
+            self.precision = max(len(s) for s in frac_part)
+
+            # for back-compatibility with np 1.13, use two spaces and full prec
             if self._legacy:
-                signpos = 2
-            max_str_len = signpos + 6 + self.precision + self.large_exponent
+                self.pad_left = 2 + (not (all(non_zero > 0) and self.sign == ' '))
+            else:
+                # this should be only 1 or two. Can be calculated from sign.
+                self.pad_left = max(len(s) for s in int_part)
+            # pad_right is not used to print, but needed for nan length calculation
+            self.pad_right = self.exp_size + 2 + self.precision
 
-            conversion = '' if self.sign == '-' else self.sign
-            format = '%' + conversion + '%d.%de' % (max_str_len, self.precision)
+            self.unique = False
         else:
-            if len(non_zero) and self.precision > 0:
-                precision = self.precision
-                trim_zero = lambda s: precision - (len(s) - len(s.rstrip('0')))
-                fmt = '%%.%df' % (precision,)
-                precision = max(trim_zero(fmt % x) for x in abs_non_zero)
+            # first pass printing to determine sizes
+            trim, unique = '.', True
+            if self.floatmode == 'fixed':
+                trim, unique = 'k', False
+            strs = (dragon4_positional(x, precision=self.precision,
+                                       unique=unique, trim=trim,
+                                       sign=self.sign == '+')
+                    for x in non_zero)
+            int_part, frac_part = zip(*(s.split('.') for s in strs))
+            self.pad_left = max(len(s) for s in int_part)
+            self.pad_right = max(len(s) for s in frac_part)
+            self.exp_size = -1
+
+            if self.floatmode in ['fixed', 'maxprec_equal']:
+                self.precision = self.pad_right
+                self.unique = False
+                self.trim = 'k'
             else:
-                precision = 0
-
-            int_len = len(str(int(max_val)))
-            signpos = self.sign != '-' or (len(str(int(min_val_sgn))) > int_len)
-            max_str_len = signpos + int_len + 1 + precision
-
-            if any(special):
-                neginf = self.sign != '-' or any(data[hasinf] < 0)
-                nanlen = len(_format_options['nanstr'])
-                inflen = len(_format_options['infstr']) + neginf
-                max_str_len = max(max_str_len, nanlen, inflen)
-
-            conversion = '' if self.sign == '-' else self.sign
-            format = '%#' + conversion + '%d.%df' % (max_str_len, precision)
-
-        self.special_fmt = '%%%ds' % (max_str_len,)
-        self.format = format
-
-    def __call__(self, x, strip_zeros=True):
-        with errstate(invalid='ignore'):
-            if isnan(x):
-                nan_str = _format_options['nanstr']
-                if self.sign == '+':
-                    return self.special_fmt % ('+' + nan_str,)
-                else:
-                    return self.special_fmt % (nan_str,)
-            elif isinf(x):
-                inf_str = _format_options['infstr']
-                if x > 0:
-                    if self.sign == '+':
-                        return self.special_fmt % ('+' + inf_str,)
-                    else:
-                        return self.special_fmt % (inf_str,)
-                else:
-                    return self.special_fmt % ('-' + inf_str,)
-
-        s = self.format % x
-        if self.large_exponent:
-            # 3-digit exponent
-            expsign = s[-3]
-            if expsign == '+' or expsign == '-':
-                s = s[1:-2] + '0' + s[-2:]
-        elif self.exp_format:
-            # 2-digit exponent
-            if s[-3] == '0':
-                s = ' ' + s[:-3] + s[-2:]
-        elif strip_zeros:
-            z = s.rstrip('0')
-            s = z + ' '*(len(s)-len(z))
-        return s
+                self.unique = True
+                self.trim = '.'
+
+        # account for sign = ' ' by adding one to pad_left
+        if len(non_zero) > 0 and all(non_zero > 0) and self.sign == ' ':
+            self.pad_left += 1
+
+        if any(special):
+            neginf = self.sign != '-' or any(data[hasinf] < 0)
+            nanlen = len(_format_options['nanstr'])
+            inflen = len(_format_options['infstr']) + neginf
+            offset = self.pad_right + 1  # +1 for decimal pt
+            self.pad_left = max(self.pad_left, nanlen - offset, inflen - offset)
+
+    def __call__(self, x):
+        if not np.isfinite(x):
+            with errstate(invalid='ignore'):
+                if np.isnan(x):
+                    sign = '+' if self.sign == '+' else ''
+                    ret = sign + _format_options['nanstr']
+                else:  # isinf
+                    sign = '-' if x < 0 else '+' if self.sign == '+' else ''
+                    ret = sign + _format_options['infstr']
+                return ' '*(self.pad_left + self.pad_right + 1 - len(ret)) + ret
+
+        if self.exp_format:
+            return dragon4_scientific(x, precision=self.precision,
+                                      unique=self.unique,
+                                      trim=self.trim, sign=self.sign == '+',
+                                      pad_left=self.pad_left,
+                                      exp_digits=self.exp_size)
+        else:
+            return dragon4_positional(x, precision=self.precision,
+                                      unique=self.unique,
+                                      trim=self.trim, sign=self.sign == '+',
+                                      pad_left=self.pad_left,
+                                      pad_right=self.pad_right)
+
+
+def format_float_scientific(x, precision=None, unique=True, trim='k',
+                            sign=False, pad_left=None, exp_digits=None):
+    """
+    Format a floating-point scalar as a string in fractional notation.
+
+    Provides control over rounding, trimming and padding. Uses and assumes
+    IEEE unbiased rounding. Uses the "Dragon4" algorithm.
+
+    Parameters
+    ----------
+    x : python float or numpy floating scalar
+        Value to format.
+    precision : non-negative integer, optional
+        Maximum number of fractional digits to print. May be ommited
+        if `unique` is `True`, but is required if unique is `False`.
+    unique : boolean, optional
+        If `False`, output exactly `precision` fractional digits and round the
+        remaining value. Digits are generated as if printing an
+        infinite-precision value and stopping after `precision` digits.
+        If `True`, use a digit-generation strategy which gives the shortest
+        representation which uniquely identifies the floating-point number from
+        other values of the same type, by judicious rounding. If `precision`
+        was omitted, print out the full unique representation, otherwise digit
+        generation is cut off after `precision` digits and the remaining value
+        is rounded.
+    trim : one of 'k', '.', '0', '-', optional
+        Controls post-processing trimming of trailing digits, as follows:
+            k : keep trailing zeros, keep decimal point (no trimming)
+            . : trim all trailing zeros, leave decimal point
+            0 : trim all but the zero before the decimal point. Insert the
+                zero if it is missing.
+            - : trim trailing zeros and any trailing decimal point
+    sign : boolean, optional
+        Whether to show the sign for positive values.
+    pad_left : non-negative integer, optional
+        Pad the left side of the string with whitespace until at least that
+        many characters are to the left of the decimal point.
+    exp_digits : non-negative integer, optional
+        Pad the exponent with zeros until it contains at least this many digits.
+        If omitted, the exponent will be at least 2 digits.
+
+    Returns
+    -------
+    rep : string
+        The string representation of the floating point value
+
+    See Also
+    --------
+    format_float_positional
+
+    Examples
+    --------
+    >>> np.format_float_scientific(np.float32(np.pi))
+    '3.1415927e+00'
+    >>> s = np.float32(1.23e24)
+    >>> np.format_float_scientific(s, unique=False, precision=15)
+    '1.230000071797338e+24'
+    >>> np.format_float_scientific(s, exp_digits=4)
+    '1.23e+0024'
+    """
+    precision = -1 if precision is None else precision
+    pad_left = -1 if pad_left is None else pad_left
+    exp_digits = -1 if exp_digits is None else exp_digits
+    return dragon4_scientific(x, precision=precision, unique=unique, trim=trim,
+                              sign=sign, pad_left=pad_left,
+                              exp_digits=exp_digits)
+
+def format_float_positional(x, precision=None, unique=True, trim='k',
+                            sign=False, pad_left=None, pad_right=None):
+    """
+    Format a floating-point scalar as a string in scientific notation.
+
+    Provides control over rounding, trimming and padding.  Uses and assumes
+    IEEE unbiased rounding. Uses the "Dragon4" algorithm.
+
+    Parameters
+    ----------
+    x : python float or numpy floating scalar
+        Value to format.
+    precision : non-negative integer, optional
+        Maximum number of fractional digits to print. May be ommited
+        if `unique` is `True`, but is required if unique is `False`.
+    unique : boolean, optional
+        If `False`, output exactly `precision` fractional digits and round the
+        remaining value. Digits are generated as if printing an
+        infinite-precision value and stopping after `precision` digits.
+        If `True`, use a digit-generation strategy which gives the shortest
+        representation which uniquely identifies the floating-point number from
+        other values of the same type, by judicious rounding. If `precision`
+        was omitted, print out the full unique representation, otherwise digit
+        generation is cut off after `precision` digits and the remaining value
+        is rounded.
+    trim : one of 'k', '.', '0', '-', optional
+        Controls post-processing trimming of trailing digits, as follows:
+            k : keep trailing zeros, keep decimal point (no trimming)
+            . : trim all trailing zeros, leave decimal point
+            0 : trim all but the zero before the decimal point. Insert the
+                zero if it is missing.
+            - : trim trailing zeros and any trailing decimal point
+    sign : boolean, optional
+        Whether to show the sign for positive values.
+    pad_left : non-negative integer, optional
+        Pad the left side of the string with whitespace until at least that
+        many characters are to the left of the decimal point.
+    pad_right : non-negative integer, optional
+        Pad the right side of the string with whitespace until at least that
+        many characters are to the right of the decimal point.
+
+    Returns
+    -------
+    rep : string
+        The string representation of the floating point value
+
+    See Also
+    --------
+    format_float_scientific
+
+    Examples
+    --------
+    >>> np.format_float_scientific(np.float32(np.pi))
+    '3.1415927'
+    >>> np.format_float_positional(np.float16(np.pi))
+    '3.14'
+    >>> np.format_float_positional(np.float16(0.3))
+    '0.3'
+    >>> np.format_float_positional(np.float16(0.3), unique=False, precision=10)
+    '0.3000488281'
+    """
+    precision = -1 if precision is None else precision
+    pad_left = -1 if pad_left is None else pad_left
+    pad_right = -1 if pad_right is None else pad_right
+    return dragon4_positional(x, precision=precision, unique=unique, trim=trim,
+                              sign=sign, pad_left=pad_left,
+                              pad_right=pad_right)
+
 
 class IntegerFormat(object):
     def __init__(self, data):
@@ -726,6 +926,7 @@ def __call__(self, x):
         else:
             return "%s" % x
 
+
 class BoolFormat(object):
     def __init__(self, data, **kwargs):
         # add an extra space so " True" and "False" have the same length and
@@ -736,75 +937,22 @@ def __call__(self, x):
         return self.truestr if x else "False"
 
 
-class LongFloatFormat(object):
-    # XXX Have to add something to determine the width to use a la FloatFormat
-    # Right now, things won't line up properly
-    def __init__(self, precision, sign=False):
+class ComplexFormat(object):
+    def __init__(self, x, precision, floatmode, suppress_small, sign=False):
         # for backcompatibility, accept bools
         if isinstance(sign, bool):
             sign = '+' if sign else '-'
 
-        self.precision = precision
-        self.sign = sign
-
-    def __call__(self, x):
-        if isnan(x):
-            nan_str = _format_options['nanstr']
-            if self.sign == '+':
-                return '+' + nan_str
-            else:
-                return ' ' + nan_str
-        elif isinf(x):
-            inf_str = _format_options['infstr']
-            if x > 0:
-                if self.sign == '+':
-                    return '+' + inf_str
-                else:
-                    return ' ' + inf_str
-            else:
-                return '-' + inf_str
-        elif x >= 0:
-            if self.sign == '+':
-                return '+' + format_longfloat(x, self.precision)
-            else:
-                return ' ' + format_longfloat(x, self.precision)
-        else:
-            return format_longfloat(x, self.precision)
-
-
-class LongComplexFormat(object):
-    def __init__(self, precision):
-        self.real_format = LongFloatFormat(precision)
-        self.imag_format = LongFloatFormat(precision, sign='+')
+        self.real_format = FloatingFormat(x.real, precision, floatmode, 
+                                          suppress_small, sign=sign)
+        self.imag_format = FloatingFormat(x.imag, precision, floatmode, 
+                                          suppress_small, sign='+')
 
     def __call__(self, x):
         r = self.real_format(x.real)
         i = self.imag_format(x.imag)
         return r + i + 'j'
 
-
-class ComplexFormat(object):
-    def __init__(self, x, precision, suppress_small, sign=False):
-        # for backcompatibility, accept bools
-        if isinstance(sign, bool):
-            sign = '+' if sign else '-'
-
-        self.real_format = FloatFormat(x.real, precision, suppress_small,
-                                       sign=sign)
-        self.imag_format = FloatFormat(x.imag, precision, suppress_small,
-                                       sign='+')
-
-    def __call__(self, x):
-        r = self.real_format(x.real, strip_zeros=False)
-        i = self.imag_format(x.imag, strip_zeros=False)
-        if not self.imag_format.exp_format:
-            z = i.rstrip('0')
-            i = z + 'j' + ' '*(len(i)-len(z))
-        else:
-            i = i + 'j'
-        return r + i
-
-
 class DatetimeFormat(object):
     def __init__(self, x, unit=None, timezone=None, casting='same_kind'):
         # Get the unit from the dtype
@@ -826,6 +974,7 @@ def __call__(self, x):
                                     timezone=self.timezone,
                                     casting=self.casting)
 
+
 class TimedeltaFormat(object):
     def __init__(self, data):
         nat_value = array(['NaT'], dtype=data.dtype)[0]
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index f56e705ab81e..366d14318ade 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -728,6 +728,7 @@ def get_mathlib_info(*args):
             join('src', 'multiarray', 'conversion_utils.h'),
             join('src', 'multiarray', 'ctors.h'),
             join('src', 'multiarray', 'descriptor.h'),
+            join('src', 'multiarray', 'dragon4.h'),
             join('src', 'multiarray', 'getset.h'),
             join('src', 'multiarray', 'hashdescr.h'),
             join('src', 'multiarray', 'iterators.h'),
@@ -793,6 +794,7 @@ def get_mathlib_info(*args):
             join('src', 'multiarray', 'datetime_busday.c'),
             join('src', 'multiarray', 'datetime_busdaycal.c'),
             join('src', 'multiarray', 'descriptor.c'),
+            join('src', 'multiarray', 'dragon4.c'),
             join('src', 'multiarray', 'dtype_transfer.c'),
             join('src', 'multiarray', 'einsum.c.src'),
             join('src', 'multiarray', 'flagsobject.c'),
diff --git a/numpy/core/src/multiarray/dragon4.c b/numpy/core/src/multiarray/dragon4.c
new file mode 100644
index 000000000000..ab0741932eb2
--- /dev/null
+++ b/numpy/core/src/multiarray/dragon4.c
@@ -0,0 +1,2737 @@
+/*
+ * Copyright (c) 2014 Ryan Juckett
+ * http://www.ryanjuckett.com/
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ *    distribution.
+ */
+
+/*
+ * This file contains a modified version of Ryan Juckett's Dragon4
+ * implementation, which has been ported from C++ to C and which has
+ * modifications specific to printing floats in numpy.
+ */
+
+#include "dragon4.h"
+#include <numpy/npy_common.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <assert.h>
+/* #define DEBUG_ASSERT(stmnt) assert(stmnt) */
+#define DEBUG_ASSERT(stmnt) {}
+
+/*
+ *  Get the log base 2 of a 32-bit unsigned integer.
+ *  http://graphics.stanford.edu/~seander/bithacks.html#IntegerLogLookup
+ */
+static npy_uint32
+LogBase2_32(npy_uint32 val)
+{
+    static const npy_uint8 logTable[256] =
+    {
+        0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7
+    };
+
+    npy_uint32 temp;
+
+    temp = val >> 24;
+    if (temp) {
+        return 24 + logTable[temp];
+    }
+
+    temp = val >> 16;
+    if (temp) {
+        return 16 + logTable[temp];
+    }
+
+    temp = val >> 8;
+    if (temp) {
+        return 8 + logTable[temp];
+    }
+
+    return logTable[val];
+}
+
+static npy_uint32
+LogBase2_64(npy_uint64 val)
+{
+    npy_uint64 temp;
+
+    temp = val >> 32;
+    if (temp) {
+        return 32 + LogBase2_32((npy_uint32)temp);
+    }
+
+    return LogBase2_32((npy_uint32)val);
+}
+
+
+/*
+ * Maximum number of 32 bit blocks needed in high precision arithmetic to print
+ * out 128 bit IEEE floating point values. 1023 chosen to be large enough for
+ * 128 bit floats, and BigInt is exactly 4kb (nice for page/cache?)
+ */
+#define c_BigInt_MaxBlocks  1023
+
+/*
+ * This structure stores a high precision unsigned integer. It uses a buffer of
+ * 32 bit integer blocks along with a length. The lowest bits of the integer
+ * are stored at the start of the buffer and the length is set to the minimum
+ * value that contains the integer. Thus, there are never any zero blocks at
+ * the end of the buffer.
+ */
+typedef struct BigInt {
+    npy_uint32 length;
+    npy_uint32 blocks[c_BigInt_MaxBlocks];
+} BigInt;
+
+/* Copy integer */
+static void
+BigInt_Copy(BigInt *dst, const BigInt *src)
+{
+    npy_uint32 length = src->length;
+    npy_uint32 * dstp = dst->blocks;
+    const npy_uint32 *srcp;
+    for (srcp = src->blocks; srcp != src->blocks + length; ++dstp, ++srcp) {
+        *dstp = *srcp;
+    }
+    dst->length = length;
+}
+
+/* Basic type accessors */
+static void
+BigInt_Set_uint64(BigInt *i, npy_uint64 val)
+{
+    if (val > 0xFFFFFFFF) {
+        i->blocks[0] = val & 0xFFFFFFFF;
+        i->blocks[1] = (val >> 32) & 0xFFFFFFFF;
+        i->length = 2;
+    }
+    else if (val != 0) {
+        i->blocks[0] = val & 0xFFFFFFFF;
+        i->length = 1;
+    }
+    else {
+        i->length = 0;
+    }
+}
+
+static void
+BigInt_Set_uint32(BigInt *i, npy_uint32 val)
+{
+    if (val != 0) {
+        i->blocks[0] = val;
+        i->length = (val != 0);
+    }
+    else {
+        i->length = 0;
+    }
+}
+
+/*
+ * Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs)
+ */
+static npy_int32
+BigInt_Compare(const BigInt *lhs, const BigInt *rhs)
+{
+    int i;
+
+    /* A bigger length implies a bigger number. */
+    npy_int32 lengthDiff = lhs->length - rhs->length;
+    if (lengthDiff != 0) {
+        return lengthDiff;
+    }
+
+    /* Compare blocks one by one from high to low. */
+    for (i = lhs->length - 1; i >= 0; --i) {
+        if (lhs->blocks[i] == rhs->blocks[i]) {
+            continue;
+        }
+        else if (lhs->blocks[i] > rhs->blocks[i]) {
+            return 1;
+        }
+        else {
+            return -1;
+        }
+    }
+
+    /* no blocks differed */
+    return 0;
+}
+
+/* result = lhs + rhs */
+static void
+BigInt_Add(BigInt *result, const BigInt *lhs, const BigInt *rhs)
+{
+    /* determine which operand has the smaller length */
+    const BigInt *large, *small;
+    npy_uint64 carry = 0;
+    const npy_uint32 *largeCur, *smallCur, *largeEnd, *smallEnd;
+    npy_uint32 *resultCur;
+
+    if (lhs->length < rhs->length) {
+        small = lhs;
+        large = rhs;
+    }
+    else {
+        small = rhs;
+        large = lhs;
+    }
+
+    /* The output will be at least as long as the largest input */
+    result->length = large->length;
+
+    /* Add each block and add carry the overflow to the next block */
+    largeCur  = large->blocks;
+    largeEnd  = largeCur + large->length;
+    smallCur  = small->blocks;
+    smallEnd  = smallCur + small->length;
+    resultCur = result->blocks;
+    while (smallCur != smallEnd) {
+        npy_uint64 sum = carry + (npy_uint64)(*largeCur) +
+                                 (npy_uint64)(*smallCur);
+        carry = sum >> 32;
+        *resultCur = sum & 0xFFFFFFFF;
+        ++largeCur;
+        ++smallCur;
+        ++resultCur;
+    }
+
+    /* Add the carry to any blocks that only exist in the large operand */
+    while (largeCur != largeEnd) {
+        npy_uint64 sum = carry + (npy_uint64)(*largeCur);
+        carry = sum >> 32;
+        (*resultCur) = sum & 0xFFFFFFFF;
+        ++largeCur;
+        ++resultCur;
+    }
+
+    /* If there's still a carry, append a new block */
+    if (carry != 0) {
+        DEBUG_ASSERT(carry == 1);
+        DEBUG_ASSERT((npy_uint32)(resultCur - result->blocks) ==
+               large->length && (large->length < c_BigInt_MaxBlocks));
+        *resultCur = 1;
+        result->length = large->length + 1;
+    }
+    else {
+        result->length = large->length;
+    }
+}
+
+/*
+ * result = lhs * rhs
+ */
+static void
+BigInt_Multiply(BigInt *result, const BigInt *lhs, const BigInt *rhs)
+{
+    const BigInt *large;
+    const BigInt *small;
+    npy_uint32 maxResultLen;
+    npy_uint32 *cur, *end, *resultStart;
+    const npy_uint32 *smallCur;
+
+    DEBUG_ASSERT(result != lhs && result != rhs);
+
+    /* determine which operand has the smaller length */
+    if (lhs->length < rhs->length) {
+        small = lhs;
+        large = rhs;
+    }
+    else {
+        small = rhs;
+        large = lhs;
+    }
+
+    /* set the maximum possible result length */
+    maxResultLen = large->length + small->length;
+    DEBUG_ASSERT(maxResultLen <= c_BigInt_MaxBlocks);
+
+    /* clear the result data */
+    for (cur = result->blocks, end = cur + maxResultLen; cur != end; ++cur) {
+        *cur = 0;
+    }
+
+    /* perform standard long multiplication for each small block */
+    resultStart = result->blocks;
+    for (smallCur = small->blocks;
+            smallCur != small->blocks + small->length;
+            ++smallCur, ++resultStart) {
+        /*
+         * if non-zero, multiply against all the large blocks and add into the
+         * result
+         */
+        const npy_uint32 multiplier = *smallCur;
+        if (multiplier != 0) {
+            const npy_uint32 *largeCur = large->blocks;
+            npy_uint32 *resultCur = resultStart;
+            npy_uint64 carry = 0;
+            do {
+                npy_uint64 product = (*resultCur) +
+                                     (*largeCur)*(npy_uint64)multiplier + carry;
+                carry = product >> 32;
+                *resultCur = product & 0xFFFFFFFF;
+                ++largeCur;
+                ++resultCur;
+            } while(largeCur != large->blocks + large->length);
+
+            DEBUG_ASSERT(resultCur < result->blocks + maxResultLen);
+            *resultCur = (npy_uint32)(carry & 0xFFFFFFFF);
+        }
+    }
+
+    /* check if the terminating block has no set bits */
+    if (maxResultLen > 0 && result->blocks[maxResultLen - 1] == 0) {
+        result->length = maxResultLen-1;
+    }
+    else {
+        result->length = maxResultLen;
+    }
+}
+
+/* result = lhs * rhs */
+static void
+BigInt_Multiply_int(BigInt *result, const BigInt *lhs, npy_uint32 rhs)
+{
+    /* perform long multiplication */
+    npy_uint32 carry = 0;
+    npy_uint32 *resultCur = result->blocks;
+    const npy_uint32 *pLhsCur = lhs->blocks;
+    const npy_uint32 *pLhsEnd = lhs->blocks + lhs->length;
+    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+        npy_uint64 product = (npy_uint64)(*pLhsCur) * rhs + carry;
+        *resultCur = (npy_uint32)(product & 0xFFFFFFFF);
+        carry = product >> 32;
+    }
+
+    /* if there is a remaining carry, grow the array */
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(lhs->length + 1 <= c_BigInt_MaxBlocks);
+        *resultCur = (npy_uint32)carry;
+        result->length = lhs->length + 1;
+    }
+    else {
+        result->length = lhs->length;
+    }
+}
+
+/* result = in * 2 */
+static void
+BigInt_Multiply2(BigInt *result, const BigInt *in)
+{
+    /* shift all the blocks by one */
+    npy_uint32 carry = 0;
+
+    npy_uint32 *resultCur = result->blocks;
+    const npy_uint32 *pLhsCur = in->blocks;
+    const npy_uint32 *pLhsEnd = in->blocks + in->length;
+    for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++resultCur) {
+        npy_uint32 cur = *pLhsCur;
+        *resultCur = (cur << 1) | carry;
+        carry = cur >> 31;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(in->length + 1 <= c_BigInt_MaxBlocks);
+        *resultCur = carry;
+        result->length = in->length + 1;
+    }
+    else {
+        result->length = in->length;
+    }
+}
+
+/* result = result * 2 */
+static void
+BigInt_Multiply2_inplace(BigInt *result)
+{
+    /* shift all the blocks by one */
+    npy_uint32 carry = 0;
+
+    npy_uint32 *cur = result->blocks;
+    npy_uint32 *end = result->blocks + result->length;
+    for ( ; cur != end; ++cur) {
+        npy_uint32 tmpcur = *cur;
+        *cur = (tmpcur << 1) | carry;
+        carry = tmpcur >> 31;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
+        *cur = carry;
+        ++result->length;
+    }
+}
+
+/* result = result * 10 */
+static void
+BigInt_Multiply10(BigInt *result)
+{
+    /* multiply all the blocks */
+    npy_uint64 carry = 0;
+
+    npy_uint32 *cur = result->blocks;
+    npy_uint32 *end = result->blocks + result->length;
+    for ( ; cur != end; ++cur) {
+        npy_uint64 product = (npy_uint64)(*cur) * 10ull + carry;
+        (*cur) = (npy_uint32)(product & 0xFFFFFFFF);
+        carry = product >> 32;
+    }
+
+    if (carry != 0) {
+        /* grow the array */
+        DEBUG_ASSERT(result->length + 1 <= c_BigInt_MaxBlocks);
+        *cur = (npy_uint32)carry;
+        ++result->length;
+    }
+}
+
+static npy_uint32 g_PowerOf10_U32[] =
+{
+    1,          /* 10 ^ 0 */
+    10,         /* 10 ^ 1 */
+    100,        /* 10 ^ 2 */
+    1000,       /* 10 ^ 3 */
+    10000,      /* 10 ^ 4 */
+    100000,     /* 10 ^ 5 */
+    1000000,    /* 10 ^ 6 */
+    10000000,   /* 10 ^ 7 */
+};
+
+/*
+ * Note: This has a lot of wasted space in the big integer structures of the
+ *       early table entries. It wouldn't be terribly hard to make the multiply
+ *       function work on integer pointers with an array length instead of
+ *       the BigInt struct which would allow us to store a minimal amount of
+ *       data here.
+ */
+static BigInt g_PowerOf10_Big[] =
+{
+    /* 10 ^ 8 */
+    { 1, { 100000000 } },
+    /* 10 ^ 16 */
+    { 2, { 0x6fc10000, 0x002386f2 } },
+    /* 10 ^ 32 */
+    { 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
+    /* 10 ^ 64 */
+    { 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed,
+           0xe93ff9f4, 0x00184f03, } },
+    /* 10 ^ 128 */
+    { 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01,
+            0x03df9909, 0x0f1538fd, 0x2374e42f, 0xd3cff5ec, 0xc404dc08,
+            0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e, } },
+    /* 10 ^ 256 */
+    { 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x982e7c01, 0xbed3875b,
+            0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70, 0xd595d80f,
+            0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e,
+            0xcc5573c0, 0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc,
+            0x5fdcefce, 0x000553f7, } },
+    /* 10 ^ 512 */
+    { 54, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+            0x00000000, 0xfc6cf801, 0x77f27267, 0x8f9546dc, 0x5d96976f,
+            0xb83a8a97, 0xc31e1ad9, 0x46c40513, 0x94e65747, 0xc88976c1,
+            0x4475b579, 0x28f8733b, 0xaa1da1bf, 0x703ed321, 0x1e25cfea,
+            0xb21a2f22, 0xbc51fb2e, 0x96e14f5d, 0xbfa3edac, 0x329c57ae,
+            0xe7fc7153, 0xc3fc0695, 0x85a91924, 0xf95f635e, 0xb2908ee0,
+            0x93abade4, 0x1366732a, 0x9449775c, 0x69be5b0e, 0x7343afac,
+            0xb099bc81, 0x45a71d46, 0xa2699748, 0x8cb07303, 0x8a0b1f13,
+            0x8cab8a97, 0xc1d238d9, 0x633415d4, 0x0000001c, } },
+    /* 10 ^ 1024 */
+    { 107, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x2919f001, 0xf55b2b72, 0x6e7c215b,
+             0x1ec29f86, 0x991c4e87, 0x15c51a88, 0x140ac535, 0x4c7d1e1a,
+             0xcc2cd819, 0x0ed1440e, 0x896634ee, 0x7de16cfb, 0x1e43f61f,
+             0x9fce837d, 0x231d2b9c, 0x233e55c7, 0x65dc60d7, 0xf451218b,
+             0x1c5cd134, 0xc9635986, 0x922bbb9f, 0xa7e89431, 0x9f9f2a07,
+             0x62be695a, 0x8e1042c4, 0x045b7a74, 0x1abe1de3, 0x8ad822a5,
+             0xba34c411, 0xd814b505, 0xbf3fdeb3, 0x8fc51a16, 0xb1b896bc,
+             0xf56deeec, 0x31fb6bfd, 0xb6f4654b, 0x101a3616, 0x6b7595fb,
+             0xdc1a47fe, 0x80d98089, 0x80bda5a5, 0x9a202882, 0x31eb0f66,
+             0xfc8f1f90, 0x976a3310, 0xe26a7b7e, 0xdf68368a, 0x3ce3a0b8,
+             0x8e4262ce, 0x75a351a2, 0x6cb0b6c9, 0x44597583, 0x31b5653f,
+             0xc356e38a, 0x35faaba6, 0x0190fba0, 0x9fc4ed52, 0x88bc491b,
+             0x1640114a, 0x005b8041, 0xf4f3235e, 0x1e8d4649, 0x36a8de06,
+             0x73c55349, 0xa7e6bd2a, 0xc1a6970c, 0x47187094, 0xd2db49ef,
+             0x926c3f5b, 0xae6209d4, 0x2d433949, 0x34f4a3c6, 0xd4305d94,
+             0xd9d61a05, 0x00000325, } },
+    /* 10 ^ 2048 */
+    { 213, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x1333e001,
+             0xe3096865, 0xb27d4d3f, 0x49e28dcf, 0xec2e4721, 0xee87e354,
+             0xb6067584, 0x368b8abb, 0xa5e5a191, 0x2ed56d55, 0xfd827773,
+             0xea50d142, 0x51b78db2, 0x98342c9e, 0xc850dabc, 0x866ed6f1,
+             0x19342c12, 0x92794987, 0xd2f869c2, 0x66912e4a, 0x71c7fd8f,
+             0x57a7842d, 0x235552eb, 0xfb7fedcc, 0xf3861ce0, 0x38209ce1,
+             0x9713b449, 0x34c10134, 0x8c6c54de, 0xa7a8289c, 0x2dbb6643,
+             0xe3cb64f3, 0x8074ff01, 0xe3892ee9, 0x10c17f94, 0xa8f16f92,
+             0xa8281ed6, 0x967abbb3, 0x5a151440, 0x9952fbed, 0x13b41e44,
+             0xafe609c3, 0xa2bca416, 0xf111821f, 0xfb1264b4, 0x91bac974,
+             0xd6c7d6ab, 0x8e48ff35, 0x4419bd43, 0xc4a65665, 0x685e5510,
+             0x33554c36, 0xab498697, 0x0dbd21fe, 0x3cfe491d, 0x982da466,
+             0xcbea4ca7, 0x9e110c7b, 0x79c56b8a, 0x5fc5a047, 0x84d80e2e,
+             0x1aa9f444, 0x730f203c, 0x6a57b1ab, 0xd752f7a6, 0x87a7dc62,
+             0x944545ff, 0x40660460, 0x77c1a42f, 0xc9ac375d, 0xe866d7ef,
+             0x744695f0, 0x81428c85, 0xa1fc6b96, 0xd7917c7b, 0x7bf03c19,
+             0x5b33eb41, 0x5715f791, 0x8f6cae5f, 0xdb0708fd, 0xb125ac8e,
+             0x785ce6b7, 0x56c6815b, 0x6f46eadb, 0x4eeebeee, 0x195355d8,
+             0xa244de3c, 0x9d7389c0, 0x53761abd, 0xcf99d019, 0xde9ec24b,
+             0x0d76ce39, 0x70beb181, 0x2e55ecee, 0xd5f86079, 0xf56d9d4b,
+             0xfb8886fb, 0x13ef5a83, 0x408f43c5, 0x3f3389a4, 0xfad37943,
+             0x58ccf45c, 0xf82df846, 0x415c7f3e, 0x2915e818, 0x8b3d5cf4,
+             0x6a445f27, 0xf8dbb57a, 0xca8f0070, 0x8ad803ec, 0xb2e87c34,
+             0x038f9245, 0xbedd8a6c, 0xc7c9dee0, 0x0eac7d56, 0x2ad3fa14,
+             0xe0de0840, 0xf775677c, 0xf1bd0ad5, 0x92be221e, 0x87fa1fb9,
+             0xce9d04a4, 0xd2c36fa9, 0x3f6f7024, 0xb028af62, 0x907855ee,
+             0xd83e49d6, 0x4efac5dc, 0xe7151aab, 0x77cd8c6b, 0x0a753b7d,
+             0x0af908b4, 0x8c983623, 0xe50f3027, 0x94222771, 0x1d08e2d6,
+             0xf7e928e6, 0xf2ee5ca6, 0x1b61b93c, 0x11eb962b, 0x9648b21c,
+             0xce2bcba1, 0x34f77154, 0x7bbebe30, 0xe526a319, 0x8ce329ac,
+             0xde4a74d2, 0xb5dc53d5, 0x0009e8b3, } },
+    /* 10 ^ 4096 */
+    { 426, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+             0x00000000, 0x00000000, 0x00000000, 0x2a67c001, 0xd4724e8d,
+             0x8efe7ae7, 0xf89a1e90, 0xef084117, 0x54e05154, 0x13b1bb51,
+             0x506be829, 0xfb29b172, 0xe599574e, 0xf0da6146, 0x806c0ed3,
+             0xb86ae5be, 0x45155e93, 0xc0591cc2, 0x7e1e7c34, 0x7c4823da,
+             0x1d1f4cce, 0x9b8ba1e8, 0xd6bfdf75, 0xe341be10, 0xc2dfae78,
+             0x016b67b2, 0x0f237f1a, 0x3dbeabcd, 0xaf6a2574, 0xcab3e6d7,
+             0x142e0e80, 0x61959127, 0x2c234811, 0x87009701, 0xcb4bf982,
+             0xf8169c84, 0x88052f8c, 0x68dde6d4, 0xbc131761, 0xff0b0905,
+             0x54ab9c41, 0x7613b224, 0x1a1c304e, 0x3bfe167b, 0x441c2d47,
+             0x4f6cea9c, 0x78f06181, 0xeb659fb8, 0x30c7ae41, 0x947e0d0e,
+             0xa1ebcad7, 0xd97d9556, 0x2130504d, 0x1a8309cb, 0xf2acd507,
+             0x3f8ec72a, 0xfd82373a, 0x95a842bc, 0x280f4d32, 0xf3618ac0,
+             0x811a4f04, 0x6dc3a5b4, 0xd3967a1b, 0x15b8c898, 0xdcfe388f,
+             0x454eb2a0, 0x8738b909, 0x10c4e996, 0x2bd9cc11, 0x3297cd0c,
+             0x655fec30, 0xae0725b1, 0xf4090ee8, 0x037d19ee, 0x398c6fed,
+             0x3b9af26b, 0xc994a450, 0xb5341743, 0x75a697b2, 0xac50b9c1,
+             0x3ccb5b92, 0xffe06205, 0xa8329761, 0xdfea5242, 0xeb83cadb,
+             0xe79dadf7, 0x3c20ee69, 0x1e0a6817, 0x7021b97a, 0x743074fa,
+             0x176ca776, 0x77fb8af6, 0xeca19beb, 0x92baf1de, 0xaf63b712,
+             0xde35c88b, 0xa4eb8f8c, 0xe137d5e9, 0x40b464a0, 0x87d1cde8,
+             0x42923bbd, 0xcd8f62ff, 0x2e2690f3, 0x095edc16, 0x59c89f1b,
+             0x1fa8fd5d, 0x5138753d, 0x390a2b29, 0x80152f18, 0x2dd8d925,
+             0xf984d83e, 0x7a872e74, 0xc19e1faf, 0xed4d542d, 0xecf9b5d0,
+             0x9462ea75, 0xc53c0adf, 0x0caea134, 0x37a2d439, 0xc8fa2e8a,
+             0x2181327e, 0x6e7bb827, 0x2d240820, 0x50be10e0, 0x5893d4b8,
+             0xab312bb9, 0x1f2b2322, 0x440b3f25, 0xbf627ede, 0x72dac789,
+             0xb608b895, 0x78787e2a, 0x86deb3f0, 0x6fee7aab, 0xbb9373f4,
+             0x27ecf57b, 0xf7d8b57e, 0xfca26a9f, 0x3d04e8d2, 0xc9df13cb,
+             0x3172826a, 0xcd9e8d7c, 0xa8fcd8e0, 0xb2c39497, 0x307641d9,
+             0x1cc939c1, 0x2608c4cf, 0xb6d1c7bf, 0x3d326a7e, 0xeeaf19e6,
+             0x8e13e25f, 0xee63302b, 0x2dfe6d97, 0x25971d58, 0xe41d3cc4,
+             0x0a80627c, 0xab8db59a, 0x9eea37c8, 0xe90afb77, 0x90ca19cf,
+             0x9ee3352c, 0x3613c850, 0xfe78d682, 0x788f6e50, 0x5b060904,
+             0xb71bd1a4, 0x3fecb534, 0xb32c450c, 0x20c33857, 0xa6e9cfda,
+             0x0239f4ce, 0x48497187, 0xa19adb95, 0xb492ed8a, 0x95aca6a8,
+             0x4dcd6cd9, 0xcf1b2350, 0xfbe8b12a, 0x1a67778c, 0x38eb3acc,
+             0xc32da383, 0xfb126ab1, 0xa03f40a8, 0xed5bf546, 0xe9ce4724,
+             0x4c4a74fd, 0x73a130d8, 0xd9960e2d, 0xa2ebd6c1, 0x94ab6feb,
+             0x6f233b7c, 0x49126080, 0x8e7b9a73, 0x4b8c9091, 0xd298f999,
+             0x35e836b5, 0xa96ddeff, 0x96119b31, 0x6b0dd9bc, 0xc6cc3f8d,
+             0x282566fb, 0x72b882e7, 0xd6769f3b, 0xa674343d, 0x00fc509b,
+             0xdcbf7789, 0xd6266a3f, 0xae9641fd, 0x4e89541b, 0x11953407,
+             0x53400d03, 0x8e0dd75a, 0xe5b53345, 0x108f19ad, 0x108b89bc,
+             0x41a4c954, 0xe03b2b63, 0x437b3d7f, 0x97aced8e, 0xcbd66670,
+             0x2c5508c2, 0x650ebc69, 0x5c4f2ef0, 0x904ff6bf, 0x9985a2df,
+             0x9faddd9e, 0x5ed8d239, 0x25585832, 0xe3e51cb9, 0x0ff4f1d4,
+             0x56c02d9a, 0x8c4ef804, 0xc1a08a13, 0x13fd01c8, 0xe6d27671,
+             0xa7c234f4, 0x9d0176cc, 0xd0d73df2, 0x4d8bfa89, 0x544f10cd,
+             0x2b17e0b2, 0xb70a5c7d, 0xfd86fe49, 0xdf373f41, 0x214495bb,
+             0x84e857fd, 0x00d313d5, 0x0496fcbe, 0xa4ba4744, 0xe8cac982,
+             0xaec29e6e, 0x87ec7038, 0x7000a519, 0xaeee333b, 0xff66e42c,
+             0x8afd6b25, 0x03b4f63b, 0xbd7991dc, 0x5ab8d9c7, 0x2ed4684e,
+             0x48741a6c, 0xaf06940d, 0x2fdc6349, 0xb03d7ecd, 0xe974996f,
+             0xac7867f9, 0x52ec8721, 0xbcdd9d4a, 0x8edd2d00, 0x3557de06,
+             0x41c759f8, 0x3956d4b9, 0xa75409f2, 0x123cd8a1, 0xb6100fab,
+             0x3e7b21e2, 0x2e8d623b, 0x92959da2, 0xbca35f77, 0x200c03a5,
+             0x35fcb457, 0x1bb6c6e4, 0xf74eb928, 0x3d5d0b54, 0x87cc1d21,
+             0x4964046f, 0x18ae4240, 0xd868b275, 0x8bd2b496, 0x1c5563f4,
+             0xc234d8f5, 0xf868e970, 0xf9151fff, 0xae7be4a2, 0x271133ee,
+             0xbb0fd922, 0x25254932, 0xa60a9fc0, 0x104bcd64, 0x30290145,
+             0x00000062, } },
+};
+
+/* result = 10^exponent */
+static void
+BigInt_Pow10(BigInt *result, npy_uint32 exponent)
+{
+    /* create two temporary values to reduce large integer copy operations */
+    BigInt temp1;
+    BigInt temp2;
+    BigInt *curTemp = &temp1;
+    BigInt *pNextTemp = &temp2;
+    npy_uint32 smallExponent;
+    npy_uint32 tableIdx = 0;
+
+    /* make sure the exponent is within the bounds of the lookup table data */
+    DEBUG_ASSERT(exponent < 8192);
+
+    /*
+     * initialize the result by looking up a 32-bit power of 10 corresponding to
+     * the first 3 bits
+     */
+    smallExponent = exponent & 0x7;
+    BigInt_Set_uint32(curTemp, g_PowerOf10_U32[smallExponent]);
+
+    /* remove the low bits that we used for the 32-bit lookup table */
+    exponent >>= 3;
+
+    /* while there are remaining bits in the exponent to be processed */
+    while (exponent != 0) {
+        /* if the current bit is set, multiply by this power of 10 */
+        if (exponent & 1) {
+            BigInt *pSwap;
+
+            /* multiply into the next temporary */
+            BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
+
+            /* swap to the next temporary */
+            pSwap = curTemp;
+            curTemp = pNextTemp;
+            pNextTemp = pSwap;
+        }
+
+        /* advance to the next bit */
+        ++tableIdx;
+        exponent >>= 1;
+    }
+
+    /* output the result */
+    BigInt_Copy(result, curTemp);
+}
+
+/* result = in * 10^exponent */
+static void
+BigInt_MultiplyPow10(BigInt *result, const BigInt *in, npy_uint32 exponent)
+{
+
+    /* create two temporary values to reduce large integer copy operations */
+    BigInt temp1;
+    BigInt temp2;
+    BigInt *curTemp = &temp1;
+    BigInt *pNextTemp = &temp2;
+    npy_uint32 smallExponent;
+    npy_uint32 tableIdx = 0;
+
+    /* make sure the exponent is within the bounds of the lookup table data */
+    DEBUG_ASSERT(exponent < 8192);
+
+    /*
+     * initialize the result by looking up a 32-bit power of 10 corresponding to
+     * the first 3 bits
+     */
+    smallExponent = exponent & 0x7;
+    if (smallExponent != 0) {
+        BigInt_Multiply_int(curTemp, in, g_PowerOf10_U32[smallExponent]);
+    }
+    else {
+        BigInt_Copy(curTemp, in);
+    }
+
+    /* remove the low bits that we used for the 32-bit lookup table */
+    exponent >>= 3;
+
+    /* while there are remaining bits in the exponent to be processed */
+    while (exponent != 0) {
+        /* if the current bit is set, multiply by this power of 10 */
+        if (exponent & 1) {
+            BigInt *pSwap;
+
+            /* multiply into the next temporary */
+            BigInt_Multiply(pNextTemp, curTemp, &g_PowerOf10_Big[tableIdx]);
+
+            // swap to the next temporary
+            pSwap = curTemp;
+            curTemp = pNextTemp;
+            pNextTemp = pSwap;
+        }
+
+        /* advance to the next bit */
+        ++tableIdx;
+        exponent >>= 1;
+    }
+
+    /* output the result */
+    BigInt_Copy(result, curTemp);
+}
+
+/* result = 2^exponent */
+static inline void
+BigInt_Pow2(BigInt *result, npy_uint32 exponent)
+{
+    npy_uint32 bitIdx;
+    npy_uint32 blockIdx = exponent / 32;
+    npy_uint32 i;
+
+    DEBUG_ASSERT(blockIdx < c_BigInt_MaxBlocks);
+
+    for (i = 0; i <= blockIdx; ++i) {
+        result->blocks[i] = 0;
+    }
+
+    result->length = blockIdx + 1;
+
+    bitIdx = (exponent % 32);
+    result->blocks[blockIdx] |= (1 << bitIdx);
+}
+
+/*
+ * This function will divide two large numbers under the assumption that the
+ * result is within the range [0,10) and the input numbers have been shifted
+ * to satisfy:
+ * - The highest block of the divisor is greater than or equal to 8 such that
+ *   there is enough precision to make an accurate first guess at the quotient.
+ * - The highest block of the divisor is less than the maximum value on an
+ *   unsigned 32-bit integer such that we can safely increment without overflow.
+ * - The dividend does not contain more blocks than the divisor such that we
+ *   can estimate the quotient by dividing the equivalently placed high blocks.
+ *
+ * quotient  = floor(dividend / divisor)
+ * remainder = dividend - quotient*divisor
+ *
+ * dividend is updated to be the remainder and the quotient is returned.
+ */
+static npy_uint32
+BigInt_DivideWithRemainder_MaxQuotient9(BigInt *dividend, const BigInt *divisor)
+{
+    npy_uint32 length, quotient;
+    const npy_uint32 *finalDivisorBlock;
+    npy_uint32 *finalDividendBlock;
+
+    /*
+     * Check that the divisor has been correctly shifted into range and that it
+     * is not smaller than the dividend in length.
+     */
+    DEBUG_ASSERT(!divisor->length == 0 &&
+                divisor->blocks[divisor->length-1] >= 8 &&
+                divisor->blocks[divisor->length-1] < 0xFFFFFFFF &&
+                dividend->length <= divisor->length);
+
+    /*
+     * If the dividend is smaller than the divisor, the quotient is zero and the
+     * divisor is already the remainder.
+     */
+    length = divisor->length;
+    if (dividend->length < divisor->length) {
+        return 0;
+    }
+
+    finalDivisorBlock = divisor->blocks + length - 1;
+    finalDividendBlock = dividend->blocks + length - 1;
+
+    /*
+     * Compute an estimated quotient based on the high block value. This will
+     * either match the actual quotient or undershoot by one.
+     */
+    quotient = *finalDividendBlock / (*finalDivisorBlock + 1);
+    DEBUG_ASSERT(quotient <= 9);
+
+    /* Divide out the estimated quotient */
+    if (quotient != 0) {
+        /* dividend = dividend - divisor*quotient */
+        const npy_uint32 *divisorCur = divisor->blocks;
+        npy_uint32 *dividendCur = dividend->blocks;
+
+        npy_uint64 borrow = 0;
+        npy_uint64 carry = 0;
+        do {
+            npy_uint64 difference, product;
+
+            product = (npy_uint64)*divisorCur * (npy_uint64)quotient + carry;
+            carry = product >> 32;
+
+            difference = (npy_uint64)*dividendCur
+                       - (product & 0xFFFFFFFF) - borrow;
+            borrow = (difference >> 32) & 1;
+
+            *dividendCur = difference & 0xFFFFFFFF;
+
+            ++divisorCur;
+            ++dividendCur;
+        } while(divisorCur <= finalDivisorBlock);
+
+        /* remove all leading zero blocks from dividend */
+        while (length > 0 && dividend->blocks[length - 1] == 0) {
+            --length;
+        }
+
+        dividend->length = length;
+    }
+
+    /*
+     * If the dividend is still larger than the divisor, we overshot our
+     * estimate quotient. To correct, we increment the quotient and subtract one
+     * more divisor from the dividend.
+     */
+    if (BigInt_Compare(dividend, divisor) >= 0) {
+        /* dividend = dividend - divisor */
+        const npy_uint32 *divisorCur = divisor->blocks;
+        npy_uint32 *dividendCur = dividend->blocks;
+        npy_uint64 borrow = 0;
+
+        ++quotient;
+
+        do {
+            npy_uint64 difference = (npy_uint64)*dividendCur
+                                  - (npy_uint64)*divisorCur - borrow;
+            borrow = (difference >> 32) & 1;
+
+            *dividendCur = difference & 0xFFFFFFFF;
+
+            ++divisorCur;
+            ++dividendCur;
+        } while(divisorCur <= finalDivisorBlock);
+
+        /* remove all leading zero blocks from dividend */
+        while (length > 0 && dividend->blocks[length - 1] == 0) {
+            --length;
+        }
+
+        dividend->length = length;
+    }
+
+    return quotient;
+}
+
+/* result = result << shift */
+static void
+BigInt_ShiftLeft(BigInt *result, npy_uint32 shift)
+{
+    npy_uint32 shiftBlocks = shift / 32;
+    npy_uint32 shiftBits = shift % 32;
+
+    /* process blocks high to low so that we can safely process in place */
+    const npy_uint32 *pInBlocks = result->blocks;
+    npy_int32 inLength = result->length;
+    npy_uint32 *pInCur, *pOutCur;
+
+    DEBUG_ASSERT(inLength + shiftBlocks < c_BigInt_MaxBlocks);
+    DEBUG_ASSERT(shift != 0);
+
+    /* check if the shift is block aligned */
+    if (shiftBits == 0) {
+        npy_uint32 i;
+
+        /* copy blcoks from high to low */
+        for (pInCur = result->blocks + result->length,
+                 pOutCur = pInCur + shiftBlocks;
+                 pInCur >= pInBlocks;
+                 --pInCur, --pOutCur) {
+            *pOutCur = *pInCur;
+        }
+
+        /* zero the remaining low blocks */
+        for (i  = 0; i < shiftBlocks; ++i) {
+            result->blocks[i] = 0;
+        }
+
+        result->length += shiftBlocks;
+    }
+    /* else we need to shift partial blocks */
+    else {
+        npy_uint32 i;
+        npy_int32 inBlockIdx = inLength - 1;
+        npy_uint32 outBlockIdx = inLength + shiftBlocks;
+
+        /* output the initial blocks */
+        const npy_uint32 lowBitsShift = (32 - shiftBits);
+        npy_uint32 highBits = 0;
+        npy_uint32 block = result->blocks[inBlockIdx];
+        npy_uint32 lowBits = block >> lowBitsShift;
+
+        /* set the length to hold the shifted blocks */
+        DEBUG_ASSERT(outBlockIdx < c_BigInt_MaxBlocks);
+        result->length = outBlockIdx + 1;
+
+        while (inBlockIdx > 0) {
+            result->blocks[outBlockIdx] = highBits | lowBits;
+            highBits = block << shiftBits;
+
+            --inBlockIdx;
+            --outBlockIdx;
+
+            block = result->blocks[inBlockIdx];
+            lowBits = block >> lowBitsShift;
+        }
+
+        /* output the final blocks */
+        DEBUG_ASSERT(outBlockIdx == shiftBlocks + 1);
+        result->blocks[outBlockIdx] = highBits | lowBits;
+        result->blocks[outBlockIdx-1] = block << shiftBits;
+
+        /* zero the remaining low blocks */
+        for (i = 0; i < shiftBlocks; ++i) {
+            result->blocks[i] = 0;
+        }
+
+        /* check if the terminating block has no set bits */
+        if (result->blocks[result->length - 1] == 0) {
+            --result->length;
+        }
+    }
+}
+
+typedef enum CutoffMode
+{
+    /*
+     * As many digits as necessary to print a uniquely identifiable number.
+     * cutoffNumber should be -1.
+     */
+    CutoffMode_Unique,
+    /* up to cutoffNumber significant digits */
+    CutoffMode_TotalLength,
+    /* up to cutoffNumber significant digits past the decimal point */
+    CutoffMode_FractionLength,
+    /*
+     * up to cutoffNumber digits, or fewer if the number can be uniquely
+     * identified with fewer
+     */
+    CutoffMode_MaxTotalUnique,
+    /*
+     * up to cutoffNumber digits pas decimal point, or fewer if the number can
+     * be uniquely identified with fewer
+     */
+    CutoffMode_MaxFractionUnique,
+} CutoffMode;
+
+/*
+ * This is an implementation the Dragon4 algorithm to convert a binary number in
+ * floating point format to a decimal number in string format. The function
+ * returns the number of digits written to the output buffer and the output is
+ * not NUL terminated.
+ *
+ * The floating point input value is (mantissa * 2^exponent).
+ *
+ * See the following papers for more information on the algorithm:
+ *  "How to Print Floating-Point Numbers Accurately"
+ *    Steele and White
+ *    http://kurtstephens.com/files/p372-steele.pdf
+ *  "Printing Floating-Point Numbers Quickly and Accurately"
+ *    Burger and Dybvig
+ *    http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656
+ *
+ * This implementation is essentially a port of the "Figure 3" Scheme code from
+ * Burger and Dybvig, but with the following additional differences:
+ *   1. Instead of finding the highest k such that high < B**k, we search
+ *      for the one where v < B**k. This has a downside that if a power
+ *      of 10 exists between v and high, we will output a 9 instead of a 1 as
+ *      first digit, violating the "no-carry" guarantee of the paper. This is
+ *      accounted for in a new post-processing loop which implements a carry
+ *      operation. The upside is one less BigInt multiplication.
+ *   2. The approximate value of k found is offset by a different amount
+ *      (0.69), in order to hit the "fast" branch more often. This is
+ *      extensively described on Ryan Juckett's website.
+ *   3. The fixed precision mode is much simpler than proposed in the paper.
+ *      It simply outputs digits by repeatedly dividing by 10. The new "carry"
+ *      loop at the end rounds this output nicely.
+ *  There is also some new code to account for details of the BigInt
+ *  implementation, which are not present in the paper since it does not specify
+ *  details of the integer calculations.
+ *
+ * There is some more documentation of these changes on Ryan Juckett's website
+ * at http://www.ryanjuckett.com/programming/printing-floating-point-numbers/
+ *
+ * Ryan Juckett's implementation did not implement "IEEE unbiased rounding",
+ * except in the last digit. This has been added back, following the Burger &
+ * Dybvig code, using the isEven variable.
+ *
+ * Arguments:
+ *   * mantissa - value significand
+ *   * exponent - value exponent in base 2
+ *   * mantissaBit - index of the highest set mantissa bit
+ *   * hasUnequalMargins - is the high margin twice as large as the low margin
+ *   * cutoffMode - how to determine output length
+ *   * cutoffNumber - parameter to the selected cutoffMode. For each mode:
+ *   * pOutBuffer - buffer to output into
+ *   * bufferSize - maximum characters that can be printed to pOutBuffer
+ *   * pOutExponent - the base 10 exponent of the first digit
+ */
+static npy_uint32
+Dragon4(const npy_uint64 mantissa, const npy_int32 exponent,
+        const npy_uint32 mantissaBit, const npy_bool hasUnequalMargins,
+        const CutoffMode cutoffMode, npy_int32 cutoffNumber, char *pOutBuffer,
+        npy_uint32 bufferSize, npy_int32 *pOutExponent)
+{
+    char *curDigit = pOutBuffer;
+
+    /*
+     * We compute values in integer format by rescaling as
+     *   mantissa = scaledValue / scale
+     *   marginLow = scaledMarginLow / scale
+     *   marginHigh = scaledMarginHigh / scale
+     * Here, marginLow and marginHigh represent 1/2 of the distance to the next
+     * floating point value above/below the mantissa.
+     *
+     * scaledMarginHigh is a pointer so that it can point to scaledMarginLow in
+     * the case they must be equal to each other, otherwise it will point to
+     * optionalMarginHigh.
+     */
+    BigInt scale;
+    BigInt scaledValue;
+    BigInt scaledMarginLow;
+    BigInt *scaledMarginHigh;
+    BigInt optionalMarginHigh;
+
+    const npy_float64 log10_2 = 0.30102999566398119521373889472449;
+    npy_int32 digitExponent, cutoffExponent, desiredCutoffExponent, hiBlock;
+    npy_uint32 outputDigit;    /* current digit being output */
+    npy_uint32 outputLen;
+    npy_bool isEven = (mantissa % 2) == 0;
+    npy_int32 cmp;
+
+    /* values used to determine how to round */
+    npy_bool low, high, roundDown;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    /* if the mantissa is zero, the value is zero regardless of the exponent */
+    if (mantissa == 0) {
+        *curDigit = '0';
+        *pOutExponent = 0;
+        return 1;
+    }
+
+    if (hasUnequalMargins) {
+        /* if we have no fractional component */
+        if (exponent > 0) {
+            /*
+             * 1) Expand the input value by multiplying out the mantissa and
+             *    exponent. This represents the input value in its whole number
+             *    representation.
+             * 2) Apply an additional scale of 2 such that later comparisons
+             *    against the margin values are simplified.
+             * 3) Set the margin value to the lowest mantissa bit's scale.
+             */
+
+            /* scaledValue      = 2 * 2 * mantissa*2^exponent */
+            BigInt_Set_uint64(&scaledValue, mantissa);
+            BigInt_ShiftLeft(&scaledValue, exponent + 2);
+
+            /* scale            = 2 * 2 * 1 */
+            BigInt_Set_uint32(&scale,  4);
+
+            /* scaledMarginLow  = 2 * 2^(exponent-1) */
+            BigInt_Pow2(&scaledMarginLow, exponent);
+
+            /* scaledMarginHigh = 2 * 2 * 2^(exponent-1) */
+            BigInt_Pow2(&optionalMarginHigh, exponent + 1);
+        }
+        /* else we have a fractional exponent */
+        else {
+            /*
+             * In order to track the mantissa data as an integer, we store it as
+             * is with a large scale
+             */
+
+            /* scaledValue      = 2 * 2 * mantissa */
+            BigInt_Set_uint64(&scaledValue, mantissa);
+            BigInt_ShiftLeft(&scaledValue, 2);
+
+            /* scale            = 2 * 2 * 2^(-exponent) */
+            BigInt_Pow2(&scale, -exponent + 2);
+
+            /* scaledMarginLow  = 2 * 2^(-1) */
+            BigInt_Set_uint32(&scaledMarginLow, 1);
+
+            /* scaledMarginHigh = 2 * 2 * 2^(-1) */
+            BigInt_Set_uint32(&optionalMarginHigh, 2);
+        }
+
+        /* the high and low margins are different */
+        scaledMarginHigh = &optionalMarginHigh;
+    }
+    else {
+        /* if we have no fractional component */
+        if (exponent > 0) {
+            /* scaledValue     = 2 * mantissa*2^exponent */
+            BigInt_Set_uint64(&scaledValue, mantissa);
+            BigInt_ShiftLeft(&scaledValue, exponent + 1);
+
+            /* scale           = 2 * 1 */
+            BigInt_Set_uint32(&scale, 2);
+
+            /* scaledMarginLow = 2 * 2^(exponent-1) */
+            BigInt_Pow2(&scaledMarginLow, exponent);
+        }
+        /* else we have a fractional exponent */
+        else {
+            /*
+             * In order to track the mantissa data as an integer, we store it as
+             * is with a large scale
+             */
+
+            /* scaledValue     = 2 * mantissa */
+            BigInt_Set_uint64(&scaledValue, mantissa);
+            BigInt_ShiftLeft(&scaledValue, 1);
+
+            /* scale           = 2 * 2^(-exponent) */
+            BigInt_Pow2(&scale, -exponent + 1);
+
+            /* scaledMarginLow = 2 * 2^(-1) */
+            BigInt_Set_uint32(&scaledMarginLow, 1);
+        }
+
+        /* the high and low margins are equal */
+        scaledMarginHigh = &scaledMarginLow;
+    }
+
+    /*
+     * Compute an estimate for digitExponent that will be correct or undershoot
+     * by one.  This optimization is based on the paper "Printing Floating-Point
+     * Numbers Quickly and Accurately" by Burger and Dybvig
+     * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656
+     * We perform an additional subtraction of 0.69 to increase the frequency of
+     * a failed estimate because that lets us take a faster branch in the code.
+     * 0.69 is chosen because 0.69 + log10(2) is less than one by a reasonable
+     * epsilon that will account for any floating point error.
+     *
+     * We want to set digitExponent to floor(log10(v)) + 1
+     *  v = mantissa*2^exponent
+     *  log2(v) = log2(mantissa) + exponent;
+     *  log10(v) = log2(v) * log10(2)
+     *  floor(log2(v)) = mantissaBit + exponent;
+     *  log10(v) - log10(2) < (mantissaBit + exponent) * log10(2) <= log10(v)
+     *  log10(v) < (mantissaBit + exponent) * log10(2) + log10(2)
+     *                                                 <= log10(v) + log10(2)
+     *  floor(log10(v)) < ceil((mantissaBit + exponent) * log10(2))
+     *                                                 <= floor(log10(v)) + 1
+     */
+    digitExponent = (npy_int32)(
+       ceil((npy_float64)((npy_int32)mantissaBit + exponent) * log10_2 - 0.69));
+
+    /*
+     * if the digit exponent is smaller than the smallest desired digit for
+     * fractional cutoff, pull the digit back into legal range at which point we
+     * will round to the appropriate value.  Note that while our value for
+     * digitExponent is still an estimate, this is safe because it only
+     * increases the number. This will either correct digitExponent to an
+     * accurate value or it will clamp it above the accurate value.
+     */
+    if ( (cutoffMode == CutoffMode_FractionLength ||
+          cutoffMode == CutoffMode_MaxFractionUnique) &&
+            digitExponent <= -cutoffNumber) {
+        digitExponent = -cutoffNumber + 1;
+    }
+
+
+    /* Divide value by 10^digitExponent. */
+    if (digitExponent > 0) {
+        /* A positive exponent creates a division so we multiply the scale. */
+        BigInt temp;
+        BigInt_MultiplyPow10(&temp, &scale, digitExponent);
+        BigInt_Copy(&scale, &temp);
+    }
+    else if (digitExponent < 0) {
+        /*
+         * A negative exponent creates a multiplication so we multiply up the
+         * scaledValue, scaledMarginLow and scaledMarginHigh.
+         */
+        BigInt pow10, temp;
+        BigInt_Pow10(&pow10, -digitExponent);
+
+        BigInt_Multiply(&temp, &scaledValue, &pow10);
+        BigInt_Copy(&scaledValue, &temp);
+
+        BigInt_Multiply(&temp, &scaledMarginLow, &pow10);
+        BigInt_Copy(&scaledMarginLow, &temp);
+
+        if (scaledMarginHigh != &scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
+        }
+    }
+
+    /* If (value >= 1), our estimate for digitExponent was too low */
+    if (BigInt_Compare(&scaledValue, &scale) >= 0) {
+        /*
+         * The exponent estimate was incorrect.
+         * Increment the exponent and don't perform the premultiply needed
+         * for the first loop iteration.
+         */
+        digitExponent = digitExponent + 1;
+    }
+    else {
+        /*
+         * The exponent estimate was correct.
+         * Multiply larger by the output base to prepare for the first loop
+         * iteration.
+         */
+        BigInt_Multiply10(&scaledValue);
+        BigInt_Multiply10(&scaledMarginLow);
+        if (scaledMarginHigh != &scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
+        }
+    }
+
+    /*
+     * Compute the cutoff exponent (the exponent of the final digit to print).
+     * Default to the maximum size of the output buffer.
+     */
+    cutoffExponent = digitExponent - bufferSize;
+    switch(cutoffMode) {
+        /* print digits until we pass the accuracy margin or buffer size */
+        case CutoffMode_Unique:
+            DEBUG_ASSERT(cutoffNumber == -1);
+            break;
+        /* print cutoffNumber of digits or until we reach the buffer size */
+        case CutoffMode_MaxTotalUnique:
+        case CutoffMode_TotalLength:
+            desiredCutoffExponent = digitExponent - cutoffNumber;
+            if (desiredCutoffExponent > cutoffExponent) {
+                cutoffExponent = desiredCutoffExponent;
+            }
+            break;
+        /* print cutoffNumber digits past the decimal point or until we reach
+         * the buffer size
+         */
+        case CutoffMode_MaxFractionUnique:
+        case CutoffMode_FractionLength:
+            desiredCutoffExponent = -cutoffNumber;
+            if (desiredCutoffExponent > cutoffExponent) {
+                cutoffExponent = desiredCutoffExponent;
+            }
+            break;
+    }
+
+    /* Output the exponent of the first digit we will print */
+    *pOutExponent = digitExponent-1;
+
+    /*
+     * In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(), we
+     * need to scale up our values such that the highest block of the
+     * denominator is greater than or equal to 8. We also need to guarantee that
+     * the numerator can never have a length greater than the denominator after
+     * each loop iteration.  This requires the highest block of the denominator
+     * to be less than or equal to 429496729 which is the highest number that
+     * can be multiplied by 10 without overflowing to a new block.
+     */
+    DEBUG_ASSERT(scale.length > 0);
+    hiBlock = scale.blocks[scale.length - 1];
+    if (hiBlock < 8 || hiBlock > 429496729) {
+        npy_uint32 hiBlockLog2, shift;
+
+        /*
+         * Perform a bit shift on all values to get the highest block of the
+         * denominator into the range [8,429496729]. We are more likely to make
+         * accurate quotient estimations in
+         * BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator
+         * values so we shift the denominator to place the highest bit at index
+         * 27 of the highest block.  This is safe because (2^28 - 1) = 268435455
+         * which is less than 429496729. This means that all values with a
+         * highest bit at index 27 are within range.
+         */
+        hiBlockLog2 = LogBase2_32(hiBlock);
+        DEBUG_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27);
+        shift = (32 + 27 - hiBlockLog2) % 32;
+
+        BigInt_ShiftLeft(&scale, shift);
+        BigInt_ShiftLeft(&scaledValue, shift);
+        BigInt_ShiftLeft(&scaledMarginLow, shift);
+        if (scaledMarginHigh != &scaledMarginLow) {
+            BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
+        }
+    }
+
+    if (cutoffMode == CutoffMode_Unique ||
+            cutoffMode == CutoffMode_MaxFractionUnique ||
+            cutoffMode == CutoffMode_MaxTotalUnique) {
+        /*
+         * For the unique cutoff mode, we will try to print until we have
+         * reached a level of precision that uniquely distinguishes this value
+         * from its neighbors. If we run out of space in the output buffer, we
+         * terminate early.
+         */
+        for (;;) {
+            BigInt scaledValueHigh;
+
+            digitExponent = digitExponent-1;
+
+            /* divide out the scale to extract the digit */
+            outputDigit =
+                BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale);
+            DEBUG_ASSERT(outputDigit < 10);
+
+            /* update the high end of the value */
+            BigInt_Add(&scaledValueHigh, &scaledValue, scaledMarginHigh);
+
+            /*
+             * stop looping if we are far enough away from our neighboring
+             * values or if we have reached the cutoff digit
+             */
+            cmp = BigInt_Compare(&scaledValue, &scaledMarginLow);
+            low = isEven ? (cmp <= 0) : (cmp < 0);
+            cmp = BigInt_Compare(&scaledValueHigh, &scale);
+            high = isEven ? (cmp >= 0) : (cmp > 0);
+            if (low | high | (digitExponent == cutoffExponent))
+                break;
+
+            /* store the output digit */
+            *curDigit = (char)('0' + outputDigit);
+            ++curDigit;
+
+            /* multiply larger by the output base */
+            BigInt_Multiply10(&scaledValue);
+            BigInt_Multiply10(&scaledMarginLow);
+            if (scaledMarginHigh != &scaledMarginLow) {
+                BigInt_Multiply2(scaledMarginHigh, &scaledMarginLow);
+            }
+        }
+    }
+    else {
+        /*
+         * For length based cutoff modes, we will try to print until we
+         * have exhausted all precision (i.e. all remaining digits are zeros) or
+         * until we reach the desired cutoff digit.
+         */
+        low = NPY_FALSE;
+        high = NPY_FALSE;
+
+        for (;;) {
+            digitExponent = digitExponent-1;
+
+            /* divide out the scale to extract the digit */
+            outputDigit =
+                BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, &scale);
+            DEBUG_ASSERT(outputDigit < 10);
+
+            if ((scaledValue.length == 0) | (digitExponent == cutoffExponent)) {
+                break;
+            }
+
+            /* store the output digit */
+            *curDigit = (char)('0' + outputDigit);
+            ++curDigit;
+
+            /* multiply larger by the output base */
+            BigInt_Multiply10(&scaledValue);
+        }
+    }
+
+    /* default to rounding down the final digit if value got too close to 0 */
+    roundDown = low;
+
+    /* if it is legal to round up and down */
+    if (low == high) {
+        npy_int32 compare;
+
+        /*
+         * round to the closest digit by comparing value with 0.5. To do this we
+         * need to convert the inequality to large integer values.
+         *  compare( value, 0.5 )
+         *  compare( scale * value, scale * 0.5 )
+         *  compare( 2 * scale * value, scale )
+         */
+        BigInt_Multiply2_inplace(&scaledValue);
+        compare = BigInt_Compare(&scaledValue, &scale);
+        roundDown = compare < 0;
+
+        /*
+         * if we are directly in the middle, round towards the even digit (i.e.
+         * IEEE rouding rules)
+         */
+        if (compare == 0) {
+            roundDown = (outputDigit & 1) == 0;
+        }
+    }
+
+    /* print the rounded digit */
+    if (roundDown) {
+        *curDigit = (char)('0' + outputDigit);
+        ++curDigit;
+    }
+    else {
+        /* handle rounding up */
+        if (outputDigit == 9) {
+            /* find the first non-nine prior digit */
+            for (;;) {
+                /* if we are at the first digit */
+                if (curDigit == pOutBuffer) {
+                    /* output 1 at the next highest exponent */
+                    *curDigit = '1';
+                    ++curDigit;
+                    *pOutExponent += 1;
+                    break;
+                }
+
+                --curDigit;
+                if (*curDigit != '9') {
+                    /* increment the digit */
+                    *curDigit += 1;
+                    ++curDigit;
+                    break;
+                }
+            }
+        }
+        else {
+            /* values in the range [0,8] can perform a simple round up */
+            *curDigit = (char)('0' + outputDigit + 1);
+            ++curDigit;
+        }
+    }
+
+    /* return the number of digits output */
+    outputLen = (npy_uint32)(curDigit - pOutBuffer);
+    DEBUG_ASSERT(outputLen <= bufferSize);
+    return outputLen;
+}
+
+
+/*
+ * Helper union to decompose a 16-bit IEEE float.
+ * sign:      1 bit
+ * exponent:  5 bits
+ * mantissa: 10 bits
+ */
+typedef union FloatUnion16
+{
+    npy_uint16 integer;
+} FloatUnion16;
+
+npy_bool   IsNegative_F16(FloatUnion16 *v) { return (v->integer >> 15) != 0; }
+npy_uint32 GetExponent_F16(FloatUnion16 *v) { return (v->integer >> 10) & 0x1F;}
+npy_uint32 GetMantissa_F16(FloatUnion16 *v) { return v->integer & 0x3FF; }
+
+
+/*
+ * Helper union to decompose a 32-bit IEEE float.
+ * sign:      1 bit
+ * exponent:  8 bits
+ * mantissa: 23 bits
+ */
+typedef union FloatUnion32
+{
+    npy_float32 floatingPoint;
+    npy_uint32 integer;
+} FloatUnion32;
+
+npy_bool   IsNegative_F32(FloatUnion32 *v) { return (v->integer >> 31) != 0; }
+npy_uint32 GetExponent_F32(FloatUnion32 *v) { return (v->integer >> 23) & 0xFF;}
+npy_uint32 GetMantissa_F32(FloatUnion32 *v) { return v->integer & 0x7FFFFF; }
+
+/*
+ * Helper union to decompose a 64-bit IEEE float.
+ * sign:      1 bit
+ * exponent: 11 bits
+ * mantissa: 52 bits
+ */
+typedef union FloatUnion64
+{
+    npy_float64 floatingPoint;
+    npy_uint64 integer;
+} FloatUnion64;
+npy_bool   IsNegative_F64(FloatUnion64 *v) { return (v->integer >> 63) != 0; }
+npy_uint32 GetExponent_F64(FloatUnion64 *v) { return (v->integer >> 52) & 0x7FF; }
+npy_uint64 GetMantissa_F64(FloatUnion64 *v) { return v->integer & 0xFFFFFFFFFFFFFull; }
+
+/*
+ * Helper unions and datatype to decompose a 80-bit IEEE float
+ * sign:      1 bit,  second u64
+ * exponent: 15 bits, second u64
+ * intbit     1 bit,  first u64
+ * mantissa: 63 bits, first u64
+ */
+
+/*
+ * Since systems have different types of long doubles, and may not necessarily
+ * have a 128-byte format we can use to pass values around, here we create
+ * our own 128-bit storage type for convenience.
+ */
+typedef struct FloatVal128 {
+    npy_uint64 integer[2];
+} FloatVal128;
+npy_bool   IsNegative_F128(FloatVal128 *v) {
+    return ((v->integer[1] >> 15) & 0x1) != 0;
+}
+npy_uint32 GetExponent_F128(FloatVal128 *v) { return v->integer[1] & 0x7FFF; }
+npy_uint64 GetMantissa_F128(FloatVal128 *v) {
+    return v->integer[0] &  0x7FFFFFFFFFFFFFFFull;
+}
+
+/*
+ * then for each different definition of long double, we create a union to
+ * unpack the float data safely. We can then copy these integers to a
+ * FloatVal128.
+ */
+#ifdef NPY_FLOAT128
+typedef union FloatUnion128
+{
+    npy_float128 floatingPoint;
+    struct {
+        npy_uint64 a;
+        npy_uint16 b;
+    } integer;
+} FloatUnion128;
+#endif
+
+#ifdef NPY_FLOAT96
+typedef union FloatUnion96
+{
+    npy_float96 floatingPoint;
+    struct {
+        npy_uint64 a;
+        npy_uint32 b;
+    } integer;
+} FloatUnion96;
+#endif
+
+#ifdef NPY_FLOAT80
+typedef union FloatUnion80
+{
+    npy_float80 floatingPoint;
+    struct {
+        npy_uint64 a;
+        npy_uint16 b;
+    } integer;
+} FloatUnion80;
+#endif
+
+
+/*
+ * The main changes above this point, relative to Ryan Juckett's code, are:
+ *  1. fixed overflow problems when mantissa was 64 bits (in float128 types),
+ *     by replacing multiplication by 2 or 4 by BigInt_ShiftLeft calls.
+ *  2. Increased c_BigInt_MaxBlocks
+ *  3. Added more entries to the g_PowerOf10_Big table
+ *  4. Added unbiased rounding calculation with isEven
+ *
+ * Below this point, the FormatPositional and FormatScientific functions have
+ * been more significantly rewritten. The Dragon4_PrintFloat16 and
+ * Dragon4_PrintFloat128 functions are new, and were adapted from the 64 and 32
+ * bit versions. The python interfacing functions (in the header) are new.
+ */
+
+
+/*
+ * Outputs the positive number with positional notation: ddddd.dddd
+ * The output is always NUL terminated and the output length (not including the
+ * NUL) is returned.
+ * Arguments:
+ *    buffer - buffer to output into
+ *    bufferSize - maximum characters that can be printed to buffer
+ *    mantissa - value significand
+ *    exponent - value exponent in base 2
+ *    signbit - value of the sign position. Should be '+', '-' or ''
+ *    mantissaBit - index of the highest set mantissa bit
+ *    hasUnequalMargins - is the high margin twice as large as the low margin
+ *    precision - Negative prints as many digits as are needed for a unique
+ *                number. Positive specifies the maximum number of significant
+ *                digits to print past the decimal point.
+ *    trim_mode - how to treat trailing 0s and '.'. See TrimMode comments.
+ *    digits_left - pad characters to left of decimal point. -1 for no padding
+ *    digits_right - pad characters to right of decimal point. -1 for no padding
+ *                   padding adds whitespace until there are the specified
+ *                   number characters to sides of decimal point. Applies after
+ *                   trim_mode characters were removed. If digits_right is
+ *                   positive and the decimal point was trimmed, decimal point
+ *                   will be replaced by a whitespace character.
+ */
+static npy_uint32
+FormatPositional(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
+                 npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
+                 npy_bool hasUnequalMargins, npy_bool unique,
+                 npy_int32 precision, TrimMode trim_mode,
+                 npy_int32 digits_left, npy_int32 digits_right)
+{
+    npy_int32 printExponent;
+    npy_int32 numDigits, numWholeDigits, has_sign=0;
+
+    npy_int32 maxPrintLen = bufferSize - 1, pos = 0;
+    CutoffMode cutoffMode;
+
+    /* track the # of digits past the decimal point that have been printed */
+    npy_int32 numFractionDigits = 0;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    if (unique) {
+        if (precision < 0) {
+            cutoffMode = CutoffMode_Unique;
+        }
+        else {
+            cutoffMode = CutoffMode_MaxFractionUnique;
+        }
+    }
+    else {
+        cutoffMode = CutoffMode_FractionLength;
+        DEBUG_ASSERT(precision >= 0);
+    }
+
+    if (signbit == '+' && pos < maxPrintLen) {
+        buffer[pos++] = '+';
+        has_sign = 1;
+    }
+    else if (signbit == '-' && pos < maxPrintLen) {
+        buffer[pos++] = '-';
+        has_sign = 1;
+    }
+
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
+                        cutoffMode, precision, buffer + has_sign,
+                        maxPrintLen - has_sign, &printExponent);
+
+    DEBUG_ASSERT(numDigits > 0);
+    DEBUG_ASSERT(numDigits <= bufferSize);
+
+    /* if output has a whole number */
+    if (printExponent >= 0) {
+        /* leave the whole number at the start of the buffer */
+        numWholeDigits = printExponent+1;
+        if (numDigits <= numWholeDigits) {
+            npy_int32 count = numWholeDigits - numDigits;
+            pos += numDigits;
+
+            /* don't overflow the buffer */
+            if (pos + count > maxPrintLen) {
+                count = maxPrintLen - pos;
+            }
+
+            /* add trailing zeros up to the decimal point */
+            numDigits += count;
+            for ( ; count > 0; count--) {
+                buffer[pos++] = '0';
+            }
+        }
+        /* insert the decimal point prior to the fraction */
+        else if (numDigits > (npy_uint32)numWholeDigits) {
+            npy_uint32 maxFractionDigits;
+
+            numFractionDigits = numDigits - numWholeDigits;
+            maxFractionDigits = maxPrintLen - numWholeDigits -1-pos;
+            if (numFractionDigits > maxFractionDigits) {
+                numFractionDigits = maxFractionDigits;
+            }
+
+            memmove(buffer + pos + numWholeDigits + 1,
+                    buffer + pos + numWholeDigits, numFractionDigits);
+            pos += numWholeDigits;
+            buffer[pos] = '.';
+            numDigits = numWholeDigits + 1 + numFractionDigits;
+            pos += 1 + numFractionDigits;
+        }
+    }
+    else {
+        /* shift out the fraction to make room for the leading zeros */
+        npy_uint32 numFractionZeros = 0;
+        if (pos + 2 < maxPrintLen) {
+            npy_uint32 maxFractionZeros, digitsStartIdx, maxFractionDigits, i;
+
+            maxFractionZeros = maxPrintLen - 2 - pos;
+            numFractionZeros = (npy_uint32)-printExponent - 1;
+            if (numFractionZeros > maxFractionZeros) {
+                numFractionZeros = maxFractionZeros;
+            }
+
+            digitsStartIdx = 2 + numFractionZeros;
+
+            /* shift the significant digits right such that there is room for
+             * leading zeros
+             */
+            numFractionDigits = numDigits;
+            maxFractionDigits = maxPrintLen - digitsStartIdx - pos;
+            if (numFractionDigits > maxFractionDigits) {
+                numFractionDigits = maxFractionDigits;
+            }
+
+            memmove(buffer + pos + digitsStartIdx, buffer + pos,
+                    numFractionDigits);
+
+            /* insert the leading zeros */
+            for (i = 2; i < digitsStartIdx; ++i) {
+                buffer[pos + i] = '0';
+            }
+
+            /* update the counts */
+            numFractionDigits += numFractionZeros;
+            numDigits = numFractionDigits;
+        }
+
+        /* add the decimal point */
+        if (pos + 1 < maxPrintLen) {
+            buffer[pos+1] = '.';
+        }
+
+        /* add the initial zero */
+        if (pos < maxPrintLen) {
+            buffer[pos] = '0';
+            numDigits += 1;
+        }
+        numWholeDigits = 1;
+        pos += 2 + numFractionDigits;
+    }
+
+    /* always add decimal point, except for DprZeros mode */
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
+            pos < maxPrintLen){
+        buffer[pos++] = '.';
+    }
+
+    if (trim_mode == TrimMode_LeaveOneZero) {
+        /* if we didn't print any fractional digits, add a trailing 0 */
+        if (numFractionDigits == 0 && pos < maxPrintLen) {
+            buffer[pos++] = '0';
+            numFractionDigits++;
+        }
+    }
+    else if (trim_mode == TrimMode_None &&
+            cutoffMode != CutoffMode_MaxFractionUnique &&
+            precision > (npy_int32)numFractionDigits && pos < maxPrintLen) {
+        /* add trailing zeros up to precision length */
+        /* compute the number of trailing zeros needed */
+        npy_uint32 count = precision - numFractionDigits;
+        if (pos + count > maxPrintLen) {
+            count = maxPrintLen - pos;
+        }
+        numFractionDigits += count;
+
+        for ( ; count > 0; count--) {
+            buffer[pos++] = '0';
+        }
+    }
+    /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
+
+    /*
+     * when rounding, we may still end up with trailing zeros. Remove them
+     * depending on trim settings.
+     */
+    if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){
+        while (buffer[pos-1] == '0') {
+            pos--;
+            numFractionDigits--;
+        }
+        if (trim_mode == TrimMode_LeaveOneZero && buffer[pos-1] == '.') {
+            buffer[pos++] = '0';
+            numFractionDigits++;
+        }
+    }
+
+    /* add any whitespace padding to right side */
+    if (digits_right >= numFractionDigits) {
+        npy_uint32 count = digits_right - numFractionDigits;
+
+        /* in trim_mode DptZeros, if right padding, add a space for the . */
+        if (trim_mode == TrimMode_DptZeros && numFractionDigits == 0
+                && pos < maxPrintLen) {
+            buffer[pos++] = ' ';
+        }
+
+        if (pos + count > maxPrintLen) {
+            count = maxPrintLen - pos;
+        }
+
+        for ( ; count > 0; count--) {
+            buffer[pos++] = ' ';
+        }
+    }
+    /* add any whitespace padding to left side */
+    if (digits_left > numWholeDigits + has_sign) {
+        npy_uint32 shift = digits_left - (numWholeDigits + has_sign);
+        npy_uint32 count = pos;
+
+        if (count + shift > maxPrintLen){
+            count = maxPrintLen - shift;
+        }
+
+        if (count > 0) {
+            memmove(buffer + shift, buffer, count);
+        }
+        pos = shift + count;
+        for ( ; shift > 0; shift--) {
+            buffer[shift-1] = ' ';
+        }
+    }
+
+    /* terminate the buffer */
+    DEBUG_ASSERT(pos <= maxPrintLen);
+    buffer[pos] = '\0';
+
+    return pos;
+}
+
+/*
+ * Outputs the positive number with scientific notation: d.dddde[sign]ddd
+ * The output is always NUL terminated and the output length (not including the
+ * NUL) is returned.
+ * Arguments:
+ *    buffer - buffer to output into
+ *    bufferSize - maximum characters that can be printed to buffer
+ *    mantissa - value significand
+ *    exponent - value exponent in base 2
+ *    signbit - value of the sign position. Should be '+', '-' or ''
+ *    mantissaBit - index of the highest set mantissa bit
+ *    hasUnequalMargins - is the high margin twice as large as the low margin
+ *    precision - Negative prints as many digits as are needed for a unique
+ *                number. Positive specifies the maximum number of significant
+ *                digits to print past the decimal point.
+ *    trim_mode - how to treat trailing 0s and '.'. See TrimMode comments.
+ *    digits_left - pad characters to left of decimal point. -1 for no padding
+ *    exp_digits - pads exponent with zeros until it has this many digits
+ */
+static npy_uint32
+FormatScientific (char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
+                  npy_int32 exponent, char signbit, npy_uint32 mantissaBit,
+                  npy_bool hasUnequalMargins, npy_bool unique,
+                  npy_int32 precision, TrimMode trim_mode,
+                  npy_int32 digits_left, npy_int32 exp_digits)
+{
+    npy_int32 printExponent;
+    npy_int32 numDigits;
+    char *pCurOut;
+    npy_int32 numFractionDigits;
+    npy_int32 leftchars;
+    CutoffMode cutoffMode;
+
+    if (unique) {
+        if (precision < 0) {
+            cutoffMode = CutoffMode_Unique;
+        }
+        else {
+            cutoffMode = CutoffMode_MaxTotalUnique;
+        }
+    }
+    else {
+        cutoffMode = CutoffMode_TotalLength;
+        DEBUG_ASSERT(precision >= 0);
+    }
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    pCurOut = buffer;
+
+    /* add any whitespace padding to left side */
+    leftchars = 1 + (signbit == '-' || signbit == '+');
+    if (digits_left > leftchars) {
+        int i;
+        for (i = 0; i < digits_left - leftchars && bufferSize > 1; i++){
+            *pCurOut = ' ';
+            pCurOut++;
+            --bufferSize;
+        }
+    }
+
+    if (signbit == '+' && bufferSize > 1) {
+        *pCurOut = '+';
+        pCurOut++;
+        --bufferSize;
+    }
+    else if (signbit == '-'  && bufferSize > 1) {
+        *pCurOut = '-';
+        pCurOut++;
+        --bufferSize;
+    }
+
+    numDigits = Dragon4(mantissa, exponent, mantissaBit, hasUnequalMargins,
+                        cutoffMode, precision + 1, pCurOut, bufferSize,
+                        &printExponent);
+
+    DEBUG_ASSERT(numDigits > 0);
+    DEBUG_ASSERT(numDigits <= bufferSize);
+
+    /* keep the whole number as the first digit */
+    if (bufferSize > 1) {
+        pCurOut += 1;
+        bufferSize -= 1;
+    }
+
+    /* insert the decimal point prior to the fractional number */
+    numFractionDigits = numDigits-1;
+    if (numFractionDigits > 0 && bufferSize > 1) {
+        npy_uint32 maxFractionDigits = bufferSize-2;
+        if (numFractionDigits > maxFractionDigits) {
+            numFractionDigits =  maxFractionDigits;
+        }
+
+        memmove(pCurOut + 1, pCurOut, numFractionDigits);
+        pCurOut[0] = '.';
+        pCurOut += (1 + numFractionDigits);
+        bufferSize -= (1 + numFractionDigits);
+    }
+
+    /* always add decimal point, except for DprZeros mode */
+    if (trim_mode != TrimMode_DptZeros && numFractionDigits == 0 &&
+            bufferSize > 1){
+        *pCurOut = '.';
+        ++pCurOut;
+        --bufferSize;
+    }
+
+    if (trim_mode == TrimMode_LeaveOneZero) {
+        /* if we didn't print any fractional digits, add the 0 */
+        if (numFractionDigits == 0 && bufferSize > 1) {
+            *pCurOut = '0';
+            ++pCurOut;
+            --bufferSize;
+            ++numFractionDigits;
+        }
+    }
+    else if (trim_mode == TrimMode_None &&
+            cutoffMode != CutoffMode_MaxTotalUnique) {
+        /* add trailing zeros up to precision length */
+        if (precision > (npy_int32)numFractionDigits) {
+            char *pEnd;
+            /* compute the number of trailing zeros needed */
+            npy_uint32 numZeros = (precision - numFractionDigits);
+            if (numZeros > bufferSize-1) {
+                numZeros = bufferSize-1;
+            }
+
+            for (pEnd = pCurOut + numZeros; pCurOut < pEnd; ++pCurOut) {
+                *pCurOut = '0';
+                ++numFractionDigits;
+            }
+        }
+    }
+    /* else, for trim_mode Zeros or DptZeros, there is nothing more to add */
+
+    /*
+     * when rounding, we may still end up with trailing zeros. Remove them
+     * depending on trim settings.
+     */
+    if (precision >= 0 && trim_mode != TrimMode_None && numFractionDigits > 0){
+        --pCurOut;
+        while (*pCurOut == '0') {
+            --pCurOut;
+            ++bufferSize;
+            --numFractionDigits;
+        }
+        if (trim_mode == TrimMode_LeaveOneZero && *pCurOut == '.') {
+            ++pCurOut;
+            *pCurOut = '0';
+            --bufferSize;
+            ++numFractionDigits;
+        }
+        ++pCurOut;
+    }
+
+    /* print the exponent into a local buffer and copy into output buffer */
+    if (bufferSize > 1) {
+        char exponentBuffer[7];
+        npy_uint32 digits[5];
+        npy_int32 i, exp_size, count;
+
+        if (exp_digits > 5) {
+            exp_digits = 5;
+        }
+        if (exp_digits < 0) {
+            exp_digits = 2;
+        }
+
+        exponentBuffer[0] = 'e';
+        if (printExponent >= 0) {
+            exponentBuffer[1] = '+';
+        }
+        else {
+            exponentBuffer[1] = '-';
+            printExponent = -printExponent;
+        }
+
+        DEBUG_ASSERT(printExponent < 100000);
+
+        /* get exp digits */
+        for (i = 0; i < 5; i++){
+            digits[i] = printExponent % 10;
+            printExponent /= 10;
+        }
+        /* count back over leading zeros */
+        for (i = 5; i > exp_digits && digits[i-1] == 0; i--) {
+        }
+        exp_size = i;
+        /* write remaining digits to tmp buf */
+        for (i = exp_size; i > 0; i--){
+            exponentBuffer[2 + (exp_size-i)] = (char)('0' + digits[i-1]);
+        }
+
+        /* copy the exponent buffer into the output */
+        count = exp_size + 2;
+        if (count > bufferSize-1) {
+            count = bufferSize-1;
+        }
+        memcpy(pCurOut, exponentBuffer, count);
+        pCurOut += count;
+        bufferSize -= count;
+    }
+
+
+    DEBUG_ASSERT(bufferSize > 0);
+    pCurOut[0] = '\0';
+
+    return pCurOut - buffer;
+}
+
+/*
+ * Print a hexadecimal value with a given width.
+ * The output string is always NUL terminated and the string length (not
+ * including the NUL) is returned.
+ */
+/*  Unused for now
+static npy_uint32
+PrintHex(char * buffer, npy_uint32 bufferSize, npy_uint64 value,
+         npy_uint32 width)
+{
+    const char digits[] = "0123456789abcdef";
+    char *pCurOut;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    npy_uint32 maxPrintLen = bufferSize-1;
+    if (width > maxPrintLen) {
+        width = maxPrintLen;
+    }
+
+    pCurOut = buffer;
+    while (width > 0) {
+        --width;
+
+        npy_uint8 digit = (npy_uint8)((value >> 4ull*(npy_uint64)width) & 0xF);
+        *pCurOut = digits[digit];
+
+        ++pCurOut;
+    }
+
+    *pCurOut = '\0';
+    return pCurOut - buffer;
+}
+*/
+
+/*
+ * Print special case values for infinities and NaNs.
+ * The output string is always NUL terminated and the string length (not
+ * including the NUL) is returned.
+ */
+static npy_uint32
+PrintInfNan(char *buffer, npy_uint32 bufferSize, npy_uint64 mantissa,
+            npy_uint32 mantissaHexWidth, char signbit)
+{
+    npy_uint32 maxPrintLen = bufferSize-1;
+    npy_uint32 pos = 0;
+
+    DEBUG_ASSERT(bufferSize > 0);
+
+    /* Check for infinity */
+    if (mantissa == 0) {
+        npy_uint32 printLen;
+
+        /* only print sign for inf values (though nan can have a sign set) */
+        if (signbit == '+') {
+            if (pos < maxPrintLen-1){
+                buffer[pos++] = '+';
+            }
+        }
+        else if (signbit == '-') {
+            if (pos < maxPrintLen-1){
+                buffer[pos++] = '-';
+            }
+        }
+
+        /* copy and make sure the buffer is terminated */
+        printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
+        memcpy(buffer + pos, "inf", printLen);
+        buffer[pos + printLen] = '\0';
+        return pos + printLen;
+    }
+    else {
+        /* copy and make sure the buffer is terminated */
+        npy_uint32 printLen = (3 < maxPrintLen - pos) ? 3 : maxPrintLen - pos;
+        memcpy(buffer + pos, "nan", printLen);
+        buffer[pos + printLen] = '\0';
+
+        /*
+         * // XXX: Should we change this for numpy?
+         * // append HEX value
+         * if (maxPrintLen > 3) {
+         *     printLen += PrintHex(buffer+3, bufferSize-3, mantissa,
+         *                          mantissaHexWidth);
+         * }
+         */
+
+        return pos + printLen;
+    }
+}
+
+/*
+ * These functions print a floating-point number as a decimal string.
+ * The output string is always NUL terminated and the string length (not
+ * including the NUL) is returned.
+ *
+ * Arguments are:
+ *   buffer - buffer to output into
+ *   bufferSize - maximum characters that can be printed to buffer
+ *   value - value significand
+ *   scientific - boolean controlling whether scientific notation is used
+ *   precision - If positive, specifies the number of decimals to show after
+ *               decimal point. If negative, sufficient digits to uniquely
+ *               specify the float will be output.
+ *   trim_mode - how to treat trailing zeros and decimal point. See TrimMode.
+ *   digits_right - pad the result with '' on the right past the decimal point
+ *   digits_left - pad the result with '' on the right past the decimal point
+ *   exp_digits - Only affects scientific output. If positive, pads the
+ *                exponent with 0s until there are this many digits. If
+ *                negative, only use sufficient digits.
+ */
+static npy_uint32
+Dragon4_PrintFloat16(char *buffer, npy_uint32 bufferSize, npy_uint16 value,
+                     npy_bool scientific, npy_bool unique, npy_int32 precision,
+                     npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
+                     npy_int32 digits_right, npy_int32 exp_digits)
+{
+    FloatUnion16 floatUnion;
+    npy_uint32 floatExponent, floatMantissa;
+
+    npy_uint32 mantissa;
+    npy_int32 exponent;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    if (bufferSize == 0) {
+        return 0;
+    }
+
+    if (bufferSize == 1) {
+        buffer[0] = '\0';
+        return 0;
+    }
+
+    /* deconstruct the floating point value */
+    floatUnion.integer = value;
+    floatExponent = GetExponent_F16(&floatUnion);
+    floatMantissa = GetMantissa_F16(&floatUnion);
+
+    /* output the sign */
+    if (IsNegative_F16(&floatUnion)) {
+        signbit = '-';
+    }
+    else if (sign) {
+        signbit = '+';
+    }
+
+    /* if this is a special value */
+    if (floatExponent == 0x1F) {
+        return PrintInfNan(buffer, bufferSize, floatMantissa, 3, signbit);
+    }
+    /* else this is a number */
+
+    /* factor the value into its parts */
+    if (floatExponent != 0) {
+        /*
+         * normalized
+         * The floating point equation is:
+         *  value = (1 + mantissa/2^10) * 2 ^ (exponent-15)
+         * We convert the integer equation by factoring a 2^10 out of the
+         * exponent
+         *  value = (1 + mantissa/2^10) * 2^10 * 2 ^ (exponent-15-10)
+         *  value = (2^10 + mantissa) * 2 ^ (exponent-15-10)
+         * Because of the implied 1 in front of the mantissa we have 10 bits of
+         * precision.
+         *   m = (2^10 + mantissa)
+         *   e = (exponent-15-10)
+         */
+        mantissa            = (1UL << 10) | floatMantissa;
+        exponent            = floatExponent - 15 - 10;
+        mantissaBit         = 10;
+        hasUnequalMargins   = (floatExponent != 1) && (floatMantissa == 0);
+    }
+    else {
+        /*
+         * denormalized
+         * The floating point equation is:
+         *  value = (mantissa/2^10) * 2 ^ (1-15)
+         * We convert the integer equation by factoring a 2^23 out of the
+         * exponent
+         *  value = (mantissa/2^10) * 2^10 * 2 ^ (1-15-10)
+         *  value = mantissa * 2 ^ (1-15-10)
+         * We have up to 10 bits of precision.
+         *   m = (mantissa)
+         *   e = (1-15-10)
+         */
+        mantissa           = floatMantissa;
+        exponent           = 1 - 15 - 10;
+        mantissaBit        = LogBase2_32(mantissa);
+        hasUnequalMargins  = NPY_FALSE;
+    }
+
+    /* format the value */
+    if (scientific) {
+        return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode, digits_left, exp_digits);
+    }
+    else {
+        return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode,
+                                digits_left, digits_right);
+    }
+}
+
+static npy_uint32
+Dragon4_PrintFloat32(char *buffer, npy_uint32 bufferSize, npy_float32 value,
+                     npy_bool scientific, npy_bool unique, npy_int32 precision,
+                     npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
+                     npy_int32 digits_right, npy_int32 exp_digits)
+{
+    FloatUnion32 floatUnion;
+    npy_uint32 floatExponent, floatMantissa;
+
+    npy_uint32 mantissa;
+    npy_int32 exponent;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    if (bufferSize == 0) {
+        return 0;
+    }
+
+    if (bufferSize == 1) {
+        buffer[0] = '\0';
+        return 0;
+    }
+
+    /* deconstruct the floating point value */
+    floatUnion.floatingPoint = value;
+    floatExponent = GetExponent_F32(&floatUnion);
+    floatMantissa = GetMantissa_F32(&floatUnion);
+
+    /* output the sign */
+    if (IsNegative_F32(&floatUnion)) {
+        signbit = '-';
+    }
+    else if (sign) {
+        signbit = '+';
+    }
+
+    /* if this is a special value */
+    if (floatExponent == 0xFF) {
+        return PrintInfNan(buffer, bufferSize, floatMantissa, 6, signbit);
+    }
+    /* else this is a number */
+
+    /* factor the value into its parts */
+    if (floatExponent != 0) {
+        /*
+         * normalized
+         * The floating point equation is:
+         *  value = (1 + mantissa/2^23) * 2 ^ (exponent-127)
+         * We convert the integer equation by factoring a 2^23 out of the
+         * exponent
+         *  value = (1 + mantissa/2^23) * 2^23 * 2 ^ (exponent-127-23)
+         *  value = (2^23 + mantissa) * 2 ^ (exponent-127-23)
+         * Because of the implied 1 in front of the mantissa we have 24 bits of
+         * precision.
+         *   m = (2^23 + mantissa)
+         *   e = (exponent-127-23)
+         */
+        mantissa            = (1UL << 23) | floatMantissa;
+        exponent            = floatExponent - 127 - 23;
+        mantissaBit         = 23;
+        hasUnequalMargins   = (floatExponent != 1) && (floatMantissa == 0);
+    }
+    else {
+        /*
+         * denormalized
+         * The floating point equation is:
+         *  value = (mantissa/2^23) * 2 ^ (1-127)
+         * We convert the integer equation by factoring a 2^23 out of the
+         * exponent
+         *  value = (mantissa/2^23) * 2^23 * 2 ^ (1-127-23)
+         *  value = mantissa * 2 ^ (1-127-23)
+         * We have up to 23 bits of precision.
+         *   m = (mantissa)
+         *   e = (1-127-23)
+         */
+        mantissa           = floatMantissa;
+        exponent           = 1 - 127 - 23;
+        mantissaBit        = LogBase2_32(mantissa);
+        hasUnequalMargins  = NPY_FALSE;
+    }
+
+    /* format the value */
+    if (scientific) {
+        return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode, digits_left, exp_digits);
+    }
+    else {
+        return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode,
+                                digits_left, digits_right);
+    }
+}
+
+static npy_uint32
+Dragon4_PrintFloat64(char *buffer, npy_uint32 bufferSize, npy_float64 value,
+                     npy_bool scientific, npy_bool unique, npy_int32 precision,
+                     npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
+                     npy_int32 digits_right, npy_int32 exp_digits)
+{
+    FloatUnion64 floatUnion;
+    npy_uint32 floatExponent;
+    npy_uint64 floatMantissa;
+
+    npy_uint64 mantissa;
+    npy_int32 exponent;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    if (bufferSize == 0) {
+        return 0;
+    }
+
+    if (bufferSize == 1) {
+        buffer[0] = '\0';
+        return 0;
+    }
+
+    /* deconstruct the floating point value */
+    floatUnion.floatingPoint = value;
+    floatExponent = GetExponent_F64(&floatUnion);
+    floatMantissa = GetMantissa_F64(&floatUnion);
+
+    /* output the sign */
+    if (IsNegative_F64(&floatUnion)) {
+        signbit = '-';
+    }
+    else if (sign) {
+        signbit = '+';
+    }
+
+    /* if this is a special value */
+    if (floatExponent == 0x7FF) {
+        return PrintInfNan(buffer, bufferSize, floatMantissa, 13, signbit);
+    }
+    /* else this is a number */
+
+    /* factor the value into its parts */
+    if (floatExponent != 0) {
+        /*
+         * normal
+         * The floating point equation is:
+         *  value = (1 + mantissa/2^52) * 2 ^ (exponent-1023)
+         * We convert the integer equation by factoring a 2^52 out of the
+         * exponent
+         *  value = (1 + mantissa/2^52) * 2^52 * 2 ^ (exponent-1023-52)
+         *  value = (2^52 + mantissa) * 2 ^ (exponent-1023-52)
+         * Because of the implied 1 in front of the mantissa we have 53 bits of
+         * precision.
+         *   m = (2^52 + mantissa)
+         *   e = (exponent-1023+1-53)
+         */
+        mantissa            = (1ull << 52) | floatMantissa;
+        exponent            = floatExponent - 1023 - 52;
+        mantissaBit         = 52;
+        hasUnequalMargins   = (floatExponent != 1) && (floatMantissa == 0);
+    }
+    else {
+        /*
+         * subnormal
+         * The floating point equation is:
+         *  value = (mantissa/2^52) * 2 ^ (1-1023)
+         * We convert the integer equation by factoring a 2^52 out of the
+         * exponent
+         *  value = (mantissa/2^52) * 2^52 * 2 ^ (1-1023-52)
+         *  value = mantissa * 2 ^ (1-1023-52)
+         * We have up to 52 bits of precision.
+         *   m = (mantissa)
+         *   e = (1-1023-52)
+         */
+        mantissa            = floatMantissa;
+        exponent            = 1 - 1023 - 52;
+        mantissaBit         = LogBase2_64(mantissa);
+        hasUnequalMargins   = NPY_FALSE;
+    }
+
+    /* format the value */
+    if (scientific) {
+        return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode, digits_left, exp_digits);
+    }
+    else {
+        return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode,
+                                digits_left, digits_right);
+    }
+}
+
+static npy_uint32
+Dragon4_PrintFloat128(char *buffer, npy_uint32 bufferSize, FloatVal128 value,
+                      npy_bool scientific, npy_bool unique, npy_int32 precision,
+                      npy_bool sign, TrimMode trim_mode, npy_int32 digits_left,
+                      npy_int32 digits_right, npy_int32 exp_digits)
+{
+    npy_uint32 floatExponent;
+    npy_uint64 floatMantissa;
+
+    npy_uint64 mantissa;
+    npy_int32 exponent;
+    npy_uint32 mantissaBit;
+    npy_bool hasUnequalMargins;
+    char signbit = '\0';
+
+    if (bufferSize == 0) {
+        return 0;
+    }
+
+    if (bufferSize == 1) {
+        buffer[0] = '\0';
+        return 0;
+    }
+
+    /* deconstruct the floating point value */
+    floatExponent = GetExponent_F128(&value);
+    floatMantissa = GetMantissa_F128(&value);
+
+    /* output the sign */
+    if (IsNegative_F128(&value)) {
+        signbit = '-';
+    }
+    else if (sign) {
+        signbit = '+';
+    }
+
+    /* if this is a special value */
+    if (floatExponent == 0x7FFF) {
+        return PrintInfNan(buffer, bufferSize, floatMantissa, 16, signbit);
+    }
+    /* else this is a number */
+
+    /* factor the value into its parts */
+    if (floatExponent != 0) {
+        /*
+         * normal
+         * The floating point equation is:
+         *  value = (1 + mantissa/2^63) * 2 ^ (exponent-16383)
+         * We convert the integer equation by factoring a 2^63 out of the
+         * exponent
+         *  value = (1 + mantissa/2^63) * 2^63 * 2 ^ (exponent-16383-63)
+         *  value = (2^63 + mantissa) * 2 ^ (exponent-16383-63)
+         * Because of the implied 1 in front of the mantissa we have 64 bits of
+         * precision.
+         *   m = (2^63 + mantissa)
+         *   e = (exponent-16383+1-64)
+         */
+        mantissa            = (1ull << 63) | floatMantissa;
+        exponent            = floatExponent - 16383 - 63;
+        mantissaBit         = 63;
+        hasUnequalMargins   = (floatExponent != 1) && (floatMantissa == 0);
+    }
+    else {
+        /*
+         * subnormal
+         * The floating point equation is:
+         *  value = (mantissa/2^63) * 2 ^ (1-16383)
+         * We convert the integer equation by factoring a 2^52 out of the
+         * exponent
+         *  value = (mantissa/2^63) * 2^52 * 2 ^ (1-16383-63)
+         *  value = mantissa * 2 ^ (1-16383-63)
+         * We have up to 63 bits of precision.
+         *   m = (mantissa)
+         *   e = (1-16383-63)
+         */
+        mantissa            = floatMantissa;
+        exponent            = 1 - 16383 - 63;
+        mantissaBit         = LogBase2_64(mantissa);
+        hasUnequalMargins   = NPY_FALSE;
+    }
+
+    /* format the value */
+    if (scientific) {
+        return FormatScientific(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode, digits_left, exp_digits);
+    }
+    else {
+        return FormatPositional(buffer, bufferSize, mantissa, exponent, signbit,
+                                mantissaBit, hasUnequalMargins, unique,
+                                precision, trim_mode,
+                                digits_left, digits_right);
+    }
+}
+
+PyObject *
+Dragon4_Positional_AnySize(void *val, size_t size, npy_bool unique,
+                           int precision, int sign, TrimMode trim,
+                           int pad_left, int pad_right)
+{
+    /* 
+     * Use a very large buffer in case anyone tries to output a large numberG.
+     * 16384 should be enough to uniquely print any float128, which goes up
+     * to about 10^4932 */
+    static char repr[16384];
+    FloatVal128 val128;
+#ifdef NPY_FLOAT80
+    FloatUnion80 buf80;;
+#endif
+#ifdef NPY_FLOAT96
+    FloatUnion96 buf96;
+#endif
+#ifdef NPY_FLOAT128
+    FloatUnion128 buf128;
+#endif
+
+    switch (size) {
+        case 2:
+            Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+        case 4:
+            Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+        case 8:
+            Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+#ifdef NPY_FLOAT80
+        case 10:
+            buf80.floatingPoint = *(npy_float80*)val;
+            val128.integer[0] = buf80.integer.a;
+            val128.integer[1] = buf80.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+#endif
+#ifdef NPY_FLOAT96
+        case 12:
+            buf96.floatingPoint = *(npy_float96*)val;
+            val128.integer[0] = buf96.integer.a;
+            val128.integer[1] = buf96.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+#endif
+#ifdef NPY_FLOAT128
+        case 16:
+            buf128.floatingPoint = *(npy_float128*)val;
+            val128.integer[0] = buf128.integer.a;
+            val128.integer[1] = buf128.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                     0, unique, precision, sign, trim, pad_left, pad_right, -1);
+            break;
+#endif
+        default:
+            PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size);
+            return NULL;
+    }
+
+    return PyUString_FromString(repr);
+}
+
+PyObject *
+Dragon4_Positional(PyObject *obj, npy_bool unique, int precision, int sign,
+                   TrimMode trim, int pad_left, int pad_right)
+{
+    double val;
+
+    if (PyArray_IsScalar(obj, Half)) {
+        npy_half x = ((PyHalfScalarObject *)obj)->obval;
+        return Dragon4_Positional_AnySize(&x, sizeof(npy_half), unique,
+                                    precision, sign, trim, pad_left, pad_right);
+    }
+    else if (PyArray_IsScalar(obj, Float)) {
+        npy_float x = ((PyFloatScalarObject *)obj)->obval;
+        return Dragon4_Positional_AnySize(&x, sizeof(npy_float), unique,
+                                    precision, sign, trim, pad_left, pad_right);
+    }
+    else if (PyArray_IsScalar(obj, Double)) {
+        npy_double x = ((PyDoubleScalarObject *)obj)->obval;
+        return Dragon4_Positional_AnySize(&x, sizeof(npy_double), unique,
+                                    precision, sign, trim, pad_left, pad_right);
+    }
+    else if (PyArray_IsScalar(obj, LongDouble)) {
+        npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval;
+        return Dragon4_Positional_AnySize(&x, sizeof(npy_longdouble), unique,
+                                    precision, sign, trim, pad_left, pad_right);
+    }
+
+    val = PyFloat_AsDouble(obj);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    return Dragon4_Positional_AnySize(&val, sizeof(double), unique,
+                               precision, sign, trim, pad_left, pad_right);
+}
+
+PyObject *
+Dragon4_Scientific_AnySize(void *val, size_t size, npy_bool unique,
+                           int precision, int sign, TrimMode trim,
+                           int pad_left, int exp_digits)
+{
+    /* use a very large buffer in case anyone tries to output a large precision */
+    static char repr[4096];
+    FloatVal128 val128;
+#ifdef NPY_FLOAT80
+    FloatUnion80 buf80;;
+#endif
+#ifdef NPY_FLOAT96
+    FloatUnion96 buf96;
+#endif
+#ifdef NPY_FLOAT128
+    FloatUnion128 buf128;
+#endif
+
+
+    switch (size) {
+        case 2:
+            Dragon4_PrintFloat16(repr, sizeof(repr), *(npy_float16*)val,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+        case 4:
+            Dragon4_PrintFloat32(repr, sizeof(repr), *(npy_float32*)val,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+        case 8:
+            Dragon4_PrintFloat64(repr, sizeof(repr), *(npy_float64*)val,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+#ifdef NPY_FLOAT80
+        case 10:
+            buf80.floatingPoint = *(npy_float80*)val;
+            val128.integer[0] = buf80.integer.a;
+            val128.integer[1] = buf80.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+#endif
+#ifdef NPY_FLOAT96
+        case 12:
+            buf96.floatingPoint = *(npy_float96*)val;
+            val128.integer[0] = buf96.integer.a;
+            val128.integer[1] = buf96.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+#endif
+#ifdef NPY_FLOAT128
+        case 16:
+            buf128.floatingPoint = *(npy_float128*)val;
+            val128.integer[0] = buf128.integer.a;
+            val128.integer[1] = buf128.integer.b;
+            Dragon4_PrintFloat128(repr, sizeof(repr), val128,
+                    1, unique, precision, sign, trim, pad_left, -1, exp_digits);
+            break;
+#endif
+        default:
+            PyErr_Format(PyExc_ValueError, "unexpected itemsize %zu", size);
+            return NULL;
+    }
+
+    return PyUString_FromString(repr);
+}
+
+PyObject *
+Dragon4_Scientific(PyObject *obj, npy_bool unique, int precision, int sign,
+                   TrimMode trim, int pad_left, int exp_digits)
+{
+    double val;
+
+    if (PyArray_IsScalar(obj, Half)) {
+        npy_half x = ((PyHalfScalarObject *)obj)->obval;
+        return Dragon4_Scientific_AnySize(&x, sizeof(npy_half), unique,
+                                   precision, sign, trim, pad_left, exp_digits);
+    }
+    else if (PyArray_IsScalar(obj, Float)) {
+        npy_float x = ((PyFloatScalarObject *)obj)->obval;
+        return Dragon4_Scientific_AnySize(&x, sizeof(npy_float), unique,
+                                   precision, sign, trim, pad_left, exp_digits);
+    }
+    else if (PyArray_IsScalar(obj, Double)) {
+        npy_double x = ((PyDoubleScalarObject *)obj)->obval;
+        return Dragon4_Scientific_AnySize(&x, sizeof(npy_double), unique,
+                                   precision, sign, trim, pad_left, exp_digits);
+    }
+    else if (PyArray_IsScalar(obj, LongDouble)) {
+        npy_longdouble x = ((PyLongDoubleScalarObject *)obj)->obval;
+        return Dragon4_Scientific_AnySize(&x, sizeof(npy_longdouble), unique,
+                                   precision, sign, trim, pad_left, exp_digits);
+    }
+
+    val = PyFloat_AsDouble(obj);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    return Dragon4_Scientific_AnySize(&val, sizeof(double), unique,
+                               precision, sign, trim, pad_left, exp_digits);
+}
diff --git a/numpy/core/src/multiarray/dragon4.h b/numpy/core/src/multiarray/dragon4.h
new file mode 100644
index 000000000000..814c84a2f1f0
--- /dev/null
+++ b/numpy/core/src/multiarray/dragon4.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2014 Ryan Juckett
+ * http://www.ryanjuckett.com/
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ *
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ *
+ * 3. This notice may not be removed or altered from any source
+ *    distribution.
+ */
+
+/*
+ * This file contains a modified version of Ryan Juckett's Dragon4
+ * implementation, which has been ported from C++ to C and which has
+ * modifications specific to printing floats in numpy.
+ */
+
+#ifndef _NPY_DRAGON4_H_
+#define _NPY_DRAGON4_H_
+
+#include "Python.h"
+#include "structmember.h"
+#define NPY_NO_DEPRECATED_API NPY_API_VERSION
+#define _MULTIARRAYMODULE
+#include "numpy/arrayobject.h"
+#include "npy_config.h"
+#include "npy_pycompat.h"
+#include "numpy/arrayscalars.h"
+
+typedef enum TrimMode
+{
+    TrimMode_None,         /* don't trim zeros, always leave a decimal point */
+    TrimMode_LeaveOneZero, /* trim all but the zero before the decimal point */
+    TrimMode_Zeros,        /* trim all trailing zeros, leave decimal point */
+    TrimMode_DptZeros,     /* trim trailing zeros & trailing decimal point */
+} TrimMode;
+
+PyObject *
+Dragon4_Positional_AnySize(void *val, size_t size, npy_bool unique,
+                           int precision, int sign, TrimMode trim,
+                           int pad_left, int pad_right);
+
+PyObject *
+Dragon4_Scientific_AnySize(void *val, size_t size, npy_bool unique,
+                           int precision, int sign, TrimMode trim,
+                           int pad_left, int exp_digits);
+
+PyObject *
+Dragon4_Positional(PyObject *obj, npy_bool unique, int precision, int sign,
+                   TrimMode trim, int pad_left, int pad_right);
+
+PyObject *
+Dragon4_Scientific(PyObject *obj, npy_bool unique, int precision, int sign,
+                   TrimMode trim, int pad_left, int exp_digits);
+
+#endif
+
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 499ec343c392..9ee53362ee09 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -37,6 +37,7 @@ NPY_NO_EXPORT int NPY_NUMUSERTYPES = 0;
 #include "arrayobject.h"
 #include "hashdescr.h"
 #include "descriptor.h"
+#include "dragon4.h"
 #include "calculation.h"
 #include "number.h"
 #include "scalartypes.h"
@@ -3583,14 +3584,156 @@ as_buffer(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
 
 #undef _test_code
 
+
+/*
+ * Prints floating-point scalars usign the Dragon4 algorithm, scientific mode.
+ * Arguments:
+ *  x - a numpy scalar of Floating type
+ *  precision - number of fractional digits to show. In unique mode, can be
+ *              ommited and the unique repr will be returned, otherwise the
+ *              unique value will be truncated to this number of digits
+ *              (breaking the uniqueness guarantee). In fixed mode, is
+ *              required, and specifies the number of fractional digits to
+ *              print.
+ *  unique - whether to use unique (default) or fixed mode.
+ *  sign - whether to show the sign for positive values. Default False
+ *  trim - one of 'k', '.', '0', '-' to control trailing digits, as follows:
+ *          k : don't trim zeros, always leave a decimal point
+ *          . : trim all but the zero before the decimal point
+ *          0 : trim all trailing zeros, leave decimal point
+ *          - : trim trailing zeros and a trailing decimal point
+ *         Default is k.
+ *  pad_left - pads left side of string with whitespace until at least
+ *             this many characters are to the left of the decimal point. If
+ *             -1, don't add any padding. Default -1.
+ *  exp_digits - exponent will contain at least this many digits, padding
+ *               with 0 if necessary. -1 means pad to 2. Maximum of 5.
+ */
+static PyObject *
+dragon4_scientific(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
+{
+    PyObject *obj;
+    static char *kwlist[] = {"x", "precision", "unique", "sign", "trim",
+                             "pad_left", "exp_digits", NULL};
+    int precision=-1, pad_left=-1, exp_digits=-1;
+    char *trimstr=NULL;
+    TrimMode trim = TrimMode_None;
+    int sign=0, unique=1;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iiisii", kwlist,
+                &obj, &precision, &unique, &sign, &trimstr, &pad_left,
+                &exp_digits)) {
+        return NULL;
+    }
+
+    if (trimstr != NULL) {
+        if (strcmp(trimstr, "k") == 0) {
+            trim = TrimMode_None;
+        }
+        else if (strcmp(trimstr, ".") == 0) {
+            trim = TrimMode_Zeros;
+        }
+        else if (strcmp(trimstr, "0") == 0) {
+            trim = TrimMode_LeaveOneZero;
+        }
+        else if (strcmp(trimstr, "-") == 0) {
+            trim = TrimMode_DptZeros;
+        }
+        else {
+            PyErr_SetString(PyExc_TypeError,
+                "if supplied, trim must be 'k', '.', '0' or '-'");
+            return NULL;
+        }
+    }
+
+    if (unique == 0 && precision < 0) {
+        PyErr_SetString(PyExc_TypeError,
+            "in non-unique mode `precision` must be supplied");
+        return NULL;
+    }
+
+    return Dragon4_Scientific(obj, unique, precision, sign,
+                              trim, pad_left, exp_digits);
+}
+
+/*
+ * Prints floating-point scalars usign the Dragon4 algorithm, positional mode.
+ * Arguments:
+ *  x - a numpy scalar of Floating type
+ *  precision - number of fractional digits to show. In unique mode, can be
+ *              ommited and the unique repr will be returned, otherwise the
+ *              unique value will be truncated to this number of digits
+ *              (breaking the uniqueness guarantee). In fixed mode, is
+ *              required, and specifies the number of fractional digits to
+ *              print.
+ *  unique - whether to use unique (default) or fixed mode.
+ *  sign - whether to show the sign for positive values. Default False
+ *  trim - one of 'k', '.', '0', '-' to control trailing digits, as follows:
+ *          k : don't trim zeros, always leave a decimal point
+ *          . : trim all but the zero before the decimal point
+ *          0 : trim all trailing zeros, leave decimal point
+ *          - : trim trailing zeros and a trailing decimal point
+ *         Default is k.
+ *  pad_left - pads left side of string with whitespace until at least
+ *             this many characters are to the left of the decimal point. If
+ *             -1, don't add any padding. Default -1.
+ *  pad_right - pads right side of string with whitespace until at least
+ *             this many characters are to the right of the decimal point. If
+ *             -1, don't add any padding. Default -1.
+ */
+static PyObject *
+dragon4_positional(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
+{
+    PyObject *obj;
+    static char *kwlist[] = {"x", "precision", "unique", "sign", "trim",
+                             "pad_left", "pad_right", NULL};
+    int precision=-1, pad_left=-1, pad_right=-1;
+    char *trimstr=NULL;
+    TrimMode trim = TrimMode_None;
+    int sign=0, unique=1;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iiisii", kwlist,
+                &obj, &precision, &unique, &sign, &trimstr, &pad_left,
+                &pad_right)) {
+        return NULL;
+    }
+
+    if (trimstr != NULL) {
+        if (strcmp(trimstr, "k") == 0) {
+            trim = TrimMode_None;
+        }
+        else if (strcmp(trimstr, ".") == 0) {
+            trim = TrimMode_Zeros;
+        }
+        else if (strcmp(trimstr, "0") == 0) {
+            trim = TrimMode_LeaveOneZero;
+        }
+        else if (strcmp(trimstr, "-") == 0) {
+            trim = TrimMode_DptZeros;
+        }
+        else {
+            PyErr_SetString(PyExc_TypeError,
+                "if supplied, trim must be 'k', '.', '0' or '-'");
+            return NULL;
+        }
+    }
+
+    if (unique == 0 && precision < 0) {
+        PyErr_SetString(PyExc_TypeError,
+            "in non-unique mode `precision` must be supplied");
+        return NULL;
+    }
+
+    return Dragon4_Positional(obj, unique, precision, sign,
+                              trim, pad_left, pad_right);
+}
+
 static PyObject *
 format_longfloat(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
 {
     PyObject *obj;
     unsigned int precision;
-    npy_longdouble x;
     static char *kwlist[] = {"x", "precision", NULL};
-    static char repr[100];
 
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OI:format_longfloat", kwlist,
                 &obj, &precision)) {
@@ -3601,12 +3744,8 @@ format_longfloat(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
                 "not a longfloat");
         return NULL;
     }
-    x = ((PyLongDoubleScalarObject *)obj)->obval;
-    if (precision > 70) {
-        precision = 70;
-    }
-    format_longdouble(repr, 100, x, precision);
-    return PyUString_FromString(repr);
+    return Dragon4_Scientific(obj, precision, 0, 1, TrimMode_LeaveOneZero,
+                              -1, -1);
 }
 
 static PyObject *
@@ -4289,6 +4428,12 @@ static struct PyMethodDef array_module_methods[] = {
     {"format_longfloat",
         (PyCFunction)format_longfloat,
         METH_VARARGS | METH_KEYWORDS, NULL},
+    {"dragon4_positional",
+        (PyCFunction)dragon4_positional,
+        METH_VARARGS | METH_KEYWORDS, NULL},
+    {"dragon4_scientific",
+        (PyCFunction)dragon4_scientific,
+        METH_VARARGS | METH_KEYWORDS, NULL},
     {"compare_chararrays",
         (PyCFunction)compare_chararrays,
         METH_VARARGS | METH_KEYWORDS, NULL},
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 87d29bf0426f..a208c2f732be 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -26,6 +26,7 @@
 #include "datetime_strings.h"
 #include "alloc.h"
 #include "npy_import.h"
+#include "dragon4.h"
 
 #include <stdlib.h>
 
@@ -429,146 +430,29 @@ gentype_format(PyObject *self, PyObject *args)
 #endif
 
 /**begin repeat
- * #name = float, double, longdouble#
- * #NAME = FLOAT, DOUBLE, LONGDOUBLE#
- * #type = npy_float, npy_double, npy_longdouble#
- * #suff = f, d, l#
+ * #name = half, float, double, longdouble#
+ * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE#
+ * #type = npy_half, npy_float, npy_double, npy_longdouble#
+ * #suff = h, f, d, l#
  */
 
-#define _FMT1 "%%.%i" NPY_@NAME@_FMT
-#define _FMT2 "%%+.%i" NPY_@NAME@_FMT
-
-NPY_NO_EXPORT void
-format_@name@(char *buf, size_t buflen, @type@ val, unsigned int prec)
+NPY_NO_EXPORT PyObject *
+format_@name@(@type@ val, npy_bool scientific,
+              int precision, int sign, TrimMode trim,
+              int pad_left, int pad_right, int exp_digits)
 {
-    /* XXX: Find a correct size here for format string */
-    char format[64], *res;
-    size_t i, cnt;
-
-    PyOS_snprintf(format, sizeof(format), _FMT1, prec);
-    res = NumPyOS_ascii_format@suff@(buf, buflen, format, val, 0);
-    if (res == NULL) {
-        fprintf(stderr, "Error while formatting\n");
-        return;
-    }
-
-    /* If nothing but digits after sign, append ".0" */
-    cnt = strlen(buf);
-    for (i = (buf[0] == '-') ? 1 : 0; i < cnt; ++i) {
-        if (!isdigit(Py_CHARMASK(buf[i]))) {
-            break;
-        }
-    }
-    if (i == cnt && buflen >= cnt + 3) {
-        strcpy(&buf[cnt],".0");
-    }
-}
-
-#undef _FMT1
-#undef _FMT2
-
-/**end repeat**/
-
-/**begin repeat
- * #name = cfloat, cdouble, clongdouble#
- * #NAME = FLOAT, DOUBLE, LONGDOUBLE#
- * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
- * #suff = f, d, l#
- */
-
-#define _FMT1 "%%.%i" NPY_@NAME@_FMT
-#define _FMT2 "%%+.%i" NPY_@NAME@_FMT
-
-static void
-format_@name@(char *buf, size_t buflen, @type@ val, unsigned int prec)
-{
-    /* XXX: Find a correct size here for format string */
-    char format[64];
-    char *res;
-
-    /*
-     * Ideally, we should handle this nan/inf stuff in NumpyOS_ascii_format*
-     */
-    if (val.real == 0.0 && npy_signbit(val.real) == 0) {
-        PyOS_snprintf(format, sizeof(format), _FMT1, prec);
-        res = NumPyOS_ascii_format@suff@(buf, buflen - 1, format, val.imag, 0);
-        if (res == NULL) {
-            /* FIXME
-             * We need a better way to handle the error message
-             */
-            fprintf(stderr, "Error while formatting\n");
-            return;
-        }
-        if (!npy_isfinite(val.imag)) {
-            strncat(buf, "*", 1);
-        }
-        strncat(buf, "j", 1);
+    if (scientific) {
+        return Dragon4_Scientific_AnySize(&val, sizeof(@type@), 1, precision,
+                                          sign, trim, pad_left, exp_digits);
     }
     else {
-        char re[64], im[64];
-        if (npy_isfinite(val.real)) {
-                PyOS_snprintf(format, sizeof(format), _FMT1, prec);
-                res = NumPyOS_ascii_format@suff@(re, sizeof(re), format,
-                        val.real, 0);
-                if (res == NULL) {
-                    /* FIXME
-                     * We need a better way to handle the error message
-                     */
-                    fprintf(stderr, "Error while formatting\n");
-                    return;
-                }
-        }
-        else {
-                if (npy_isnan(val.real)) {
-                        strcpy(re, "nan");
-                }
-                else if (val.real > 0){
-                        strcpy(re, "inf");
-                }
-                else {
-                        strcpy(re, "-inf");
-                }
-        }
-
-
-        if (npy_isfinite(val.imag)) {
-                PyOS_snprintf(format, sizeof(format), _FMT2, prec);
-                res = NumPyOS_ascii_format@suff@(im, sizeof(im), format,
-                        val.imag, 0);
-                if (res == NULL) {
-                    fprintf(stderr, "Error while formatting\n");
-                    return;
-                }
-        }
-        else {
-                if (npy_isnan(val.imag)) {
-                        strcpy(im, "+nan");
-                }
-                else if (val.imag > 0){
-                        strcpy(im, "+inf");
-                }
-                else {
-                        strcpy(im, "-inf");
-                }
-                if (!npy_isfinite(val.imag)) {
-                        strncat(im, "*", 1);
-                }
-        }
-        PyOS_snprintf(buf, buflen, "(%s%sj)", re, im);
+        return Dragon4_Positional_AnySize(&val, sizeof(@type@), 1, precision,
+                                          sign, trim, pad_left, pad_right);
     }
 }
 
-#undef _FMT1
-#undef _FMT2
-
 /**end repeat**/
 
-NPY_NO_EXPORT void
-format_half(char *buf, size_t buflen, npy_half val, unsigned int prec)
-{
-    format_float(buf, buflen, npy_half_to_float(val), prec);
-}
-
 /*
  * over-ride repr and str of array-scalar strings and unicode to
  * remove NULL bytes and then call the corresponding functions
@@ -828,21 +712,6 @@ timedeltatype_str(PyObject *self)
     return ret;
 }
 
-/* The REPR values are finfo.precision + 2 */
-#define HALFPREC_REPR 5
-#define HALFPREC_STR 5
-#define FLOATPREC_REPR 8
-#define FLOATPREC_STR 6
-#define DOUBLEPREC_REPR 17
-#define DOUBLEPREC_STR 12
-#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
-#define LONGDOUBLEPREC_REPR DOUBLEPREC_REPR
-#define LONGDOUBLEPREC_STR DOUBLEPREC_STR
-#else /* More than probably needed on Intel FP */
-#define LONGDOUBLEPREC_REPR 20
-#define LONGDOUBLEPREC_STR 12
-#endif
-
 /*
  * float type str and repr
  *
@@ -850,85 +719,100 @@ timedeltatype_str(PyObject *self)
  */
 
 /**begin repeat
- * #name = half, float, double, longdouble#
- * #Name = Half, Float, Double, LongDouble#
- * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE#
- * #hascomplex = 0, 1, 1, 1#
+ * #kind = str, repr#
  */
+
 /**begin repeat1
- * #kind = str, repr#
- * #KIND = STR, REPR#
+ * #name = float, double, longdouble#
+ * #Name = Float, Double, LongDouble#
+ * #NAME = FLOAT, DOUBLE, LONGDOUBLE#
  */
 
-#define PREC @NAME@PREC_@KIND@
+static PyObject *
+@name@type_@kind@_either(npy_@name@ val, TrimMode trim_pos, TrimMode trim_sci,
+                         npy_bool sign)
+{
+    if (val < (npy_@name@)1.e16L) {
+        return format_@name@(val, 0, -1, sign, trim_pos, -1, -1, -1);
+    }
+    return format_@name@(val, 1, -1, sign, trim_sci, -1, -1, -1);
+}
 
 static PyObject *
 @name@type_@kind@(PyObject *self)
 {
-    char buf[100];
-    npy_@name@ val = ((Py@Name@ScalarObject *)self)->obval;
-
-    format_@name@(buf, sizeof(buf), val, PREC);
-    return PyUString_FromString(buf);
+    return @name@type_@kind@_either(((Py@Name@ScalarObject *)self)->obval,
+                                  TrimMode_LeaveOneZero, TrimMode_DptZeros, 0);
 }
 
-#if @hascomplex@
 static PyObject *
 c@name@type_@kind@(PyObject *self)
 {
-    char buf[202];
+    PyObject *rstr, *istr, *ret;
     npy_c@name@ val = ((PyC@Name@ScalarObject *)self)->obval;
+    TrimMode trim = TrimMode_DptZeros;
+
+    if (val.real == 0.0 && npy_signbit(val.real) == 0) {
+        istr = @name@type_@kind@_either(val.imag, trim, trim, 0);
+        if (istr == NULL) {
+            return NULL;
+        }
+        PyUString_ConcatAndDel(&istr, PyUString_FromString("j"));
+        return istr;
+    }
+
+    if (npy_isfinite(val.real)) {
+        rstr = @name@type_@kind@_either(val.real, trim, trim, 0);
+        if (rstr == NULL) {
+            return NULL;
+        }
+    }
+    else if (npy_isnan(val.real)) {
+        rstr = PyUString_FromString("nan");
+    }
+    else if (val.real > 0){
+        rstr = PyUString_FromString("inf");
+    }
+    else {
+        rstr = PyUString_FromString("-inf");
+    }
+
+    if (npy_isfinite(val.imag)) {
+        istr = @name@type_@kind@_either(val.imag, trim, trim, 1);
+        if (istr == NULL) {
+            return NULL;
+        }
+    }
+    else if (npy_isnan(val.imag)) {
+        istr = PyUString_FromString("+nan");
+    }
+    else if (val.imag > 0){
+        istr = PyUString_FromString("+inf");
+    }
+    else {
+        istr = PyUString_FromString("-inf");
+    }
 
-    format_c@name@(buf, sizeof(buf), val, PREC);
-    return PyUString_FromString(buf);
+    ret = PyUString_FromString("(");
+    PyUString_ConcatAndDel(&ret, rstr);
+    PyUString_ConcatAndDel(&ret, istr);
+    PyUString_ConcatAndDel(&ret, PyUString_FromString("j)"));
+    return ret;
 }
-#endif
 
 #undef PREC
 
 /**end repeat1**/
-/**end repeat**/
-
-/*
- * float type print (control print a, where a is a float type instance)
- */
-/**begin repeat
- * #name = half, float, double, longdouble#
- * #Name = Half, Float, Double, LongDouble#
- * #NAME = HALF, FLOAT, DOUBLE, LONGDOUBLE#
- * #hascomplex = 0, 1, 1, 1#
- */
-
-static int
-@name@type_print(PyObject *v, FILE *fp, int flags)
-{
-        char buf[100];
-        npy_@name@ val = ((Py@Name@ScalarObject *)v)->obval;
-
-        format_@name@(buf, sizeof(buf), val,
-                      (flags & Py_PRINT_RAW) ? @NAME@PREC_STR : @NAME@PREC_REPR);
-        Py_BEGIN_ALLOW_THREADS
-        fputs(buf, fp);
-        Py_END_ALLOW_THREADS
-        return 0;
-}
 
-#if @hascomplex@
-static int
-c@name@type_print(PyObject *v, FILE *fp, int flags)
+static PyObject *
+halftype_@kind@(PyObject *self)
 {
-        /* Size of buf: twice sizeof(real) + 2 (for the parenthesis) */
-        char buf[202];
-        npy_c@name@ val = ((PyC@Name@ScalarObject *)v)->obval;
-
-        format_c@name@(buf, sizeof(buf), val,
-                       (flags & Py_PRINT_RAW) ? @NAME@PREC_STR : @NAME@PREC_REPR);
-        Py_BEGIN_ALLOW_THREADS
-        fputs(buf, fp);
-        Py_END_ALLOW_THREADS
-        return 0;
+    npy_half val = ((PyHalfScalarObject *)self)->obval;
+    if (npy_half_to_double(val) < 1.e16) {
+        return format_half(val, 0, -1, 0, TrimMode_LeaveOneZero, -1, -1, -1);
+    }
+    return format_half(val, 1, -1, 0, TrimMode_DptZeros, -1, -1, -1);
 }
-#endif
 
 /**end repeat**/
 
@@ -4227,15 +4111,6 @@ initialize_numeric_types(void)
     /**end repeat**/
 
 
-    PyHalfArrType_Type.tp_print = halftype_print;
-    PyFloatArrType_Type.tp_print = floattype_print;
-    PyDoubleArrType_Type.tp_print = doubletype_print;
-    PyLongDoubleArrType_Type.tp_print = longdoubletype_print;
-
-    PyCFloatArrType_Type.tp_print = cfloattype_print;
-    PyCDoubleArrType_Type.tp_print = cdoubletype_print;
-    PyCLongDoubleArrType_Type.tp_print = clongdoubletype_print;
-
     /*
      * These need to be coded specially because getitem does not
      * return a normal Python type
diff --git a/numpy/core/src/multiarray/scalartypes.h b/numpy/core/src/multiarray/scalartypes.h
index b8d6cf83ee9a..83b188128aaf 100644
--- a/numpy/core/src/multiarray/scalartypes.h
+++ b/numpy/core/src/multiarray/scalartypes.h
@@ -19,9 +19,6 @@ initialize_casting_tables(void);
 NPY_NO_EXPORT void
 initialize_numeric_types(void);
 
-NPY_NO_EXPORT void
-format_longdouble(char *buf, size_t buflen, npy_longdouble val, unsigned int prec);
-
 #if PY_VERSION_HEX >= 0x03000000
 NPY_NO_EXPORT void
 gentype_struct_free(PyObject *ptr);
diff --git a/numpy/core/tests/test_arrayprint.py b/numpy/core/tests/test_arrayprint.py
index 26faabfb8855..196c68aa3aa3 100644
--- a/numpy/core/tests/test_arrayprint.py
+++ b/numpy/core/tests/test_arrayprint.py
@@ -72,42 +72,42 @@ def test_str(self):
         dtypes = [np.complex64, np.cdouble, np.clongdouble]
         actual = [str(np.array([c], dt)) for c in cvals for dt in dtypes]
         wanted = [
-            '[0.+0.j]',    '[0.+0.j]',    '[ 0.0+0.0j]',
-            '[0.+1.j]',    '[0.+1.j]',    '[ 0.0+1.0j]',
-            '[0.-1.j]',    '[0.-1.j]',    '[ 0.0-1.0j]',
-            '[0.+infj]',   '[0.+infj]',   '[ 0.0+infj]',
-            '[0.-infj]',   '[0.-infj]',   '[ 0.0-infj]',
-            '[0.+nanj]',   '[0.+nanj]',   '[ 0.0+nanj]',
-            '[1.+0.j]',    '[1.+0.j]',    '[ 1.0+0.0j]',
-            '[1.+1.j]',    '[1.+1.j]',    '[ 1.0+1.0j]',
-            '[1.-1.j]',    '[1.-1.j]',    '[ 1.0-1.0j]',
-            '[1.+infj]',   '[1.+infj]',   '[ 1.0+infj]',
-            '[1.-infj]',   '[1.-infj]',   '[ 1.0-infj]',
-            '[1.+nanj]',   '[1.+nanj]',   '[ 1.0+nanj]',
-            '[-1.+0.j]',    '[-1.+0.j]',    '[-1.0+0.0j]',
-            '[-1.+1.j]',    '[-1.+1.j]',    '[-1.0+1.0j]',
-            '[-1.-1.j]',    '[-1.-1.j]',    '[-1.0-1.0j]',
-            '[-1.+infj]',   '[-1.+infj]',   '[-1.0+infj]',
-            '[-1.-infj]',   '[-1.-infj]',   '[-1.0-infj]',
-            '[-1.+nanj]',   '[-1.+nanj]',   '[-1.0+nanj]',
-            '[inf+0.j]',   '[inf+0.j]',   '[ inf+0.0j]',
-            '[inf+1.j]',   '[inf+1.j]',   '[ inf+1.0j]',
-            '[inf-1.j]',   '[inf-1.j]',   '[ inf-1.0j]',
-            '[inf+infj]',  '[inf+infj]',  '[ inf+infj]',
-            '[inf-infj]',  '[inf-infj]',  '[ inf-infj]',
-            '[inf+nanj]',  '[inf+nanj]',  '[ inf+nanj]',
-            '[-inf+0.j]',   '[-inf+0.j]',   '[-inf+0.0j]',
-            '[-inf+1.j]',   '[-inf+1.j]',   '[-inf+1.0j]',
-            '[-inf-1.j]',   '[-inf-1.j]',   '[-inf-1.0j]',
-            '[-inf+infj]',  '[-inf+infj]',  '[-inf+infj]',
-            '[-inf-infj]',  '[-inf-infj]',  '[-inf-infj]',
-            '[-inf+nanj]',  '[-inf+nanj]',  '[-inf+nanj]',
-            '[nan+0.j]',   '[nan+0.j]',   '[ nan+0.0j]',
-            '[nan+1.j]',   '[nan+1.j]',   '[ nan+1.0j]',
-            '[nan-1.j]',   '[nan-1.j]',   '[ nan-1.0j]',
-            '[nan+infj]',  '[nan+infj]',  '[ nan+infj]',
-            '[nan-infj]',  '[nan-infj]',  '[ nan-infj]',
-            '[nan+nanj]',  '[nan+nanj]',  '[ nan+nanj]']
+            '[0.+0.j]',    '[0.+0.j]',    '[0.+0.j]',
+            '[0.+1.j]',    '[0.+1.j]',    '[0.+1.j]',
+            '[0.-1.j]',    '[0.-1.j]',    '[0.-1.j]',
+            '[0.+infj]',   '[0.+infj]',   '[0.+infj]',
+            '[0.-infj]',   '[0.-infj]',   '[0.-infj]',
+            '[0.+nanj]',   '[0.+nanj]',   '[0.+nanj]',
+            '[1.+0.j]',    '[1.+0.j]',    '[1.+0.j]',
+            '[1.+1.j]',    '[1.+1.j]',    '[1.+1.j]',
+            '[1.-1.j]',    '[1.-1.j]',    '[1.-1.j]',
+            '[1.+infj]',   '[1.+infj]',   '[1.+infj]',
+            '[1.-infj]',   '[1.-infj]',   '[1.-infj]',
+            '[1.+nanj]',   '[1.+nanj]',   '[1.+nanj]',
+            '[-1.+0.j]',   '[-1.+0.j]',   '[-1.+0.j]',
+            '[-1.+1.j]',   '[-1.+1.j]',   '[-1.+1.j]',
+            '[-1.-1.j]',   '[-1.-1.j]',   '[-1.-1.j]',
+            '[-1.+infj]',  '[-1.+infj]',  '[-1.+infj]',
+            '[-1.-infj]',  '[-1.-infj]',  '[-1.-infj]',
+            '[-1.+nanj]',  '[-1.+nanj]',  '[-1.+nanj]',
+            '[inf+0.j]',   '[inf+0.j]',   '[inf+0.j]',
+            '[inf+1.j]',   '[inf+1.j]',   '[inf+1.j]',
+            '[inf-1.j]',   '[inf-1.j]',   '[inf-1.j]',
+            '[inf+infj]',  '[inf+infj]',  '[inf+infj]',
+            '[inf-infj]',  '[inf-infj]',  '[inf-infj]',
+            '[inf+nanj]',  '[inf+nanj]',  '[inf+nanj]',
+            '[-inf+0.j]',  '[-inf+0.j]',  '[-inf+0.j]',
+            '[-inf+1.j]',  '[-inf+1.j]',  '[-inf+1.j]',
+            '[-inf-1.j]',  '[-inf-1.j]',  '[-inf-1.j]',
+            '[-inf+infj]', '[-inf+infj]', '[-inf+infj]',
+            '[-inf-infj]', '[-inf-infj]', '[-inf-infj]',
+            '[-inf+nanj]', '[-inf+nanj]', '[-inf+nanj]',
+            '[nan+0.j]',   '[nan+0.j]',   '[nan+0.j]',
+            '[nan+1.j]',   '[nan+1.j]',   '[nan+1.j]',
+            '[nan-1.j]',   '[nan-1.j]',   '[nan-1.j]',
+            '[nan+infj]',  '[nan+infj]',  '[nan+infj]',
+            '[nan-infj]',  '[nan-infj]',  '[nan-infj]',
+            '[nan+nanj]',  '[nan+nanj]',  '[nan+nanj]']
 
         for res, val in zip(actual, wanted):
             assert_equal(res, val)
@@ -290,22 +290,23 @@ def test_sign_spacing(self):
 
         assert_equal(repr(a), 'array([0., 1., 2., 3.])')
         assert_equal(repr(np.array(1.)), 'array(1.)')
-        assert_equal(repr(b), 'array([1.23400000e+09])')
+        assert_equal(repr(b), 'array([1.234e+09])')
 
         np.set_printoptions(sign=' ')
         assert_equal(repr(a), 'array([ 0.,  1.,  2.,  3.])')
         assert_equal(repr(np.array(1.)), 'array( 1.)')
-        assert_equal(repr(b), 'array([ 1.23400000e+09])')
+        assert_equal(repr(b), 'array([ 1.234e+09])')
 
         np.set_printoptions(sign='+')
         assert_equal(repr(a), 'array([+0., +1., +2., +3.])')
         assert_equal(repr(np.array(1.)), 'array(+1.)')
-        assert_equal(repr(b), 'array([+1.23400000e+09])')
+        assert_equal(repr(b), 'array([+1.234e+09])')
 
         np.set_printoptions(sign='legacy')
         assert_equal(repr(a), 'array([ 0.,  1.,  2.,  3.])')
         assert_equal(repr(np.array(1.)), 'array(1.)')
-        assert_equal(repr(b), 'array([  1.23400000e+09])')
+        assert_equal(repr(b),  'array([  1.23400000e+09])')
+        assert_equal(repr(-b), 'array([ -1.23400000e+09])')
 
     def test_sign_spacing_structured(self):
         a = np.ones(2, dtype='f,f')
@@ -313,6 +314,86 @@ def test_sign_spacing_structured(self):
                               "      dtype=[('f0', '<f4'), ('f1', '<f4')])")
         assert_equal(repr(a[0]), "(1., 1.)")
 
+    def test_floatmode(self):
+        x = np.array([0.6104, 0.922, 0.457, 0.0906, 0.3733, 0.007244,
+                      0.5933, 0.947, 0.2383, 0.4226], dtype=np.float16)
+        y = np.array([0.2918820979355541, 0.5064172631089138,
+                      0.2848750619642916, 0.4342965294660567,
+                      0.7326538397312751, 0.3459503329096204,
+                      0.0862072768214508, 0.39112753029631175],
+                      dtype=np.float64)
+        z = np.arange(6, dtype=np.float16)/10
+
+        # also make sure 1e23 is right (is between two fp numbers)
+        w = np.array(['1e{}'.format(i) for i in range(25)], dtype=np.float64)
+        # note: we construct w from the strings `1eXX` instead of doing
+        # `10.**arange(24)` because it turns out the two are not equivalent in
+        # python. On some architectures `1e23 != 10.**23`.
+        wp = np.array([1.234e1, 1e2, 1e123])
+
+        # unique mode
+        np.set_printoptions(floatmode='unique')
+        assert_equal(repr(x),
+    "array([0.6104  , 0.922   , 0.457   , 0.0906  , 0.3733  , 0.007244,\n"
+    "       0.5933  , 0.947   , 0.2383  , 0.4226  ], dtype=float16)")
+        assert_equal(repr(y),
+    "array([0.2918820979355541 , 0.5064172631089138 , 0.2848750619642916 ,\n"
+    "       0.4342965294660567 , 0.7326538397312751 , 0.3459503329096204 ,\n"
+    "       0.0862072768214508 , 0.39112753029631175])")
+        assert_equal(repr(z),
+    "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)")
+        assert_equal(repr(w),
+    "array([1.e+00, 1.e+01, 1.e+02, 1.e+03, 1.e+04, 1.e+05, 1.e+06, 1.e+07,\n"
+    "       1.e+08, 1.e+09, 1.e+10, 1.e+11, 1.e+12, 1.e+13, 1.e+14, 1.e+15,\n"
+    "       1.e+16, 1.e+17, 1.e+18, 1.e+19, 1.e+20, 1.e+21, 1.e+22, 1.e+23,\n"
+    "       1.e+24])")
+        assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
+
+        # maxprec mode, precision=8
+        np.set_printoptions(floatmode='maxprec', precision=8)
+        assert_equal(repr(x),
+            "array([0.6104  , 0.922   , 0.457   , 0.0906  , 0.3733  , 0.007244,\n"
+            "       0.5933  , 0.947   , 0.2383  , 0.4226  ], dtype=float16)")
+        assert_equal(repr(y),
+            "array([0.2918821 , 0.50641726, 0.28487506, 0.43429653, 0.73265384,\n"
+            "       0.34595033, 0.08620728, 0.39112753])")
+        assert_equal(repr(z),
+            "array([0. , 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)")
+        assert_equal(repr(w[::5]),
+            "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])")
+        assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
+
+        # fixed mode, precision=4
+        np.set_printoptions(floatmode='fixed', precision=4)
+        assert_equal(repr(x),
+    "array([0.6104, 0.9219, 0.4570, 0.0906, 0.3733, 0.0072, 0.5933, 0.9468,\n"
+    "       0.2383, 0.4226], dtype=float16)")
+        assert_equal(repr(y),
+    "array([0.2919, 0.5064, 0.2849, 0.4343, 0.7327, 0.3460, 0.0862, 0.3911])")
+        assert_equal(repr(z),
+    "array([0.0000, 0.1000, 0.2000, 0.3000, 0.3999, 0.5000], dtype=float16)")
+        assert_equal(repr(w[::5]),
+    "array([1.0000e+00, 1.0000e+05, 1.0000e+10, 1.0000e+15, 1.0000e+20])")
+        assert_equal(repr(wp), "array([1.2340e+001, 1.0000e+002, 1.0000e+123])")
+        # for larger precision, representation error becomes more apparent:
+        np.set_printoptions(floatmode='fixed', precision=8)
+        assert_equal(repr(z),
+    "array([0.00000000, 0.09997559, 0.19995117, 0.30004883, 0.39990234,\n"
+    "       0.50000000], dtype=float16)")
+
+        # maxprec_equal  mode, precision=8
+        np.set_printoptions(floatmode='maxprec_equal', precision=8)
+        assert_equal(repr(x),
+    "array([0.610352, 0.921875, 0.457031, 0.090576, 0.373291, 0.007244,\n"
+    "       0.593262, 0.946777, 0.238281, 0.422607], dtype=float16)")
+        assert_equal(repr(y),
+    "array([0.29188210, 0.50641726, 0.28487506, 0.43429653, 0.73265384,\n"
+    "       0.34595033, 0.08620728, 0.39112753])")
+        assert_equal(repr(z),
+            "array([0.0, 0.1, 0.2, 0.3, 0.4, 0.5], dtype=float16)")
+        assert_equal(repr(w[::5]),
+            "array([1.e+00, 1.e+05, 1.e+10, 1.e+15, 1.e+20])")
+        assert_equal(repr(wp), "array([1.234e+001, 1.000e+002, 1.000e+123])")
 
 def test_unicode_object_array():
     import sys
diff --git a/numpy/core/tests/test_getlimits.py b/numpy/core/tests/test_getlimits.py
index 455f5257c93b..361246c7ff69 100644
--- a/numpy/core/tests/test_getlimits.py
+++ b/numpy/core/tests/test_getlimits.py
@@ -80,7 +80,7 @@ def test_iinfo_repr(self):
         assert_equal(repr(np.iinfo(np.int16)), expected)
 
     def test_finfo_repr(self):
-        expected = "finfo(resolution=1e-06, min=-3.4028235e+38," + \
+        expected = "finfo(resolution=0.000001, min=-3.4028235e+38," + \
                    " max=3.4028235e+38, dtype=float32)"
         assert_equal(repr(np.finfo(np.float32)), expected)
 
diff --git a/numpy/core/tests/test_print.py b/numpy/core/tests/test_print.py
index 305258d6ff4c..4b5c5f81f037 100644
--- a/numpy/core/tests/test_print.py
+++ b/numpy/core/tests/test_print.py
@@ -23,12 +23,12 @@ def check_float_type(tp):
         assert_equal(str(tp(x)), str(float(x)),
                      err_msg='Failed str formatting for type %s' % tp)
 
-    if tp(1e10).itemsize > 4:
-        assert_equal(str(tp(1e10)), str(float('1e10')),
+    if tp(1e16).itemsize > 4:
+        assert_equal(str(tp(1e16)), str(float('1e16')),
                      err_msg='Failed str formatting for type %s' % tp)
     else:
-        ref = '1e+10'
-        assert_equal(str(tp(1e10)), ref,
+        ref = '1e+16'
+        assert_equal(str(tp(1e16)), ref,
                      err_msg='Failed str formatting for type %s' % tp)
 
 def test_float_types():
@@ -67,12 +67,12 @@ def check_complex_type(tp):
         assert_equal(str(tp(x + x*1j)), str(complex(x + x*1j)),
                      err_msg='Failed str formatting for type %s' % tp)
 
-    if tp(1e10).itemsize > 8:
-        assert_equal(str(tp(1e10)), str(complex(1e10)),
+    if tp(1e16).itemsize > 8:
+        assert_equal(str(tp(1e16)), str(complex(1e16)),
                      err_msg='Failed str formatting for type %s' % tp)
     else:
-        ref = '(1e+10+0j)'
-        assert_equal(str(tp(1e10)), ref,
+        ref = '(1e+16+0j)'
+        assert_equal(str(tp(1e16)), ref,
                      err_msg='Failed str formatting for type %s' % tp)
 
 def test_complex_types():
@@ -90,21 +90,21 @@ def test_complex_inf_nan():
     """Check inf/nan formatting of complex types."""
     TESTS = {
         complex(np.inf, 0): "(inf+0j)",
-        complex(0, np.inf): "inf*j",
+        complex(0, np.inf): "infj",
         complex(-np.inf, 0): "(-inf+0j)",
-        complex(0, -np.inf): "-inf*j",
+        complex(0, -np.inf): "-infj",
         complex(np.inf, 1): "(inf+1j)",
-        complex(1, np.inf): "(1+inf*j)",
+        complex(1, np.inf): "(1+infj)",
         complex(-np.inf, 1): "(-inf+1j)",
-        complex(1, -np.inf): "(1-inf*j)",
+        complex(1, -np.inf): "(1-infj)",
         complex(np.nan, 0): "(nan+0j)",
-        complex(0, np.nan): "nan*j",
+        complex(0, np.nan): "nanj",
         complex(-np.nan, 0): "(nan+0j)",
-        complex(0, -np.nan): "nan*j",
+        complex(0, -np.nan): "nanj",
         complex(np.nan, 1): "(nan+1j)",
-        complex(1, np.nan): "(1+nan*j)",
+        complex(1, np.nan): "(1+nanj)",
         complex(-np.nan, 1): "(nan+1j)",
-        complex(1, -np.nan): "(1+nan*j)",
+        complex(1, -np.nan): "(1+nanj)",
     }
     for tp in [np.complex64, np.cdouble, np.clongdouble]:
         for c, s in TESTS.items():
@@ -139,11 +139,11 @@ def check_float_type_print(tp):
     for x in [np.inf, -np.inf, np.nan]:
         _test_redirected_print(float(x), tp, _REF[x])
 
-    if tp(1e10).itemsize > 4:
-        _test_redirected_print(float(1e10), tp)
+    if tp(1e16).itemsize > 4:
+        _test_redirected_print(float(1e16), tp)
     else:
-        ref = '1e+10'
-        _test_redirected_print(float(1e10), tp, ref)
+        ref = '1e+16'
+        _test_redirected_print(float(1e16), tp, ref)
 
 def check_complex_type_print(tp):
     # We do not create complex with inf/nan directly because the feature is
@@ -151,11 +151,11 @@ def check_complex_type_print(tp):
     for x in [0, 1, -1, 1e20]:
         _test_redirected_print(complex(x), tp)
 
-    if tp(1e10).itemsize > 8:
-        _test_redirected_print(complex(1e10), tp)
+    if tp(1e16).itemsize > 8:
+        _test_redirected_print(complex(1e16), tp)
     else:
-        ref = '(1e+10+0j)'
-        _test_redirected_print(complex(1e10), tp, ref)
+        ref = '(1e+16+0j)'
+        _test_redirected_print(complex(1e16), tp, ref)
 
     _test_redirected_print(complex(np.inf, 1), tp, '(inf+1j)')
     _test_redirected_print(complex(-np.inf, 1), tp, '(-inf+1j)')
@@ -240,7 +240,7 @@ def test_locale_double():
 
 @in_foreign_locale
 def test_locale_longdouble():
-    assert_equal(str(np.longdouble(1.2)), str(float(1.2)))
+    assert_equal(str(np.longdouble('1.2')), str(float(1.2)))
 
 if __name__ == "__main__":
     run_module_suite()
diff --git a/numpy/core/tests/test_scalarprint.py b/numpy/core/tests/test_scalarprint.py
index 7e17e04252db..07a21014a25d 100644
--- a/numpy/core/tests/test_scalarprint.py
+++ b/numpy/core/tests/test_scalarprint.py
@@ -5,26 +5,193 @@
 from __future__ import division, absolute_import, print_function
 
 import numpy as np
-from numpy.testing import assert_, run_module_suite
+from numpy.testing import assert_, assert_equal, run_module_suite
 
 
 class TestRealScalars(object):
     def test_str(self):
         svals = [0.0, -0.0, 1, -1, np.inf, -np.inf, np.nan]
         styps = [np.float16, np.float32, np.float64, np.longdouble]
-        actual = [str(f(c)) for c in svals for f in styps]
         wanted = [
-             '0.0',  '0.0',  '0.0',  '0.0',
-             '-0.0', '-0.0', '-0.0', '-0.0',
-             '1.0',  '1.0',  '1.0',  '1.0',
-             '-1.0', '-1.0', '-1.0', '-1.0',
-             'inf',  'inf',  'inf',  'inf',
-             '-inf', '-inf', '-inf', '-inf',
-             'nan',  'nan',  'nan',  'nan']
+             ['0.0',  '0.0',  '0.0',  '0.0' ],
+             ['-0.0', '-0.0', '-0.0', '-0.0'],
+             ['1.0',  '1.0',  '1.0',  '1.0' ],
+             ['-1.0', '-1.0', '-1.0', '-1.0'],
+             ['inf',  'inf',  'inf',  'inf' ],
+             ['-inf', '-inf', '-inf', '-inf'],
+             ['nan',  'nan',  'nan',  'nan']]
 
-        for res, val in zip(actual, wanted):
-            assert_(res == val)
+        for wants, val in zip(wanted, svals):
+            for want, styp in zip(wants, styps):
+                msg = 'for str({}({}))'.format(np.dtype(styp).name, repr(val))
+                assert_equal(str(styp(val)), want, err_msg=msg)
 
+    def test_dragon4(self):
+        # these tests are adapted from Ryan Juckett's dragon4 implementation,
+        # see dragon4.c for details.
+
+        fpos32 = lambda x, **k: np.format_float_positional(np.float32(x), **k)
+        fsci32 = lambda x, **k: np.format_float_scientific(np.float32(x), **k)
+        fpos64 = lambda x, **k: np.format_float_positional(np.float64(x), **k)
+        fsci64 = lambda x, **k: np.format_float_scientific(np.float64(x), **k)
+
+        preckwd = lambda prec: {'unique': False, 'precision': prec}
+
+        assert_equal(fpos32('1.0'), "1.")
+        assert_equal(fsci32('1.0'), "1.e+00")
+        assert_equal(fpos32('10.234'), "10.234")
+        assert_equal(fpos32('-10.234'), "-10.234")
+        assert_equal(fsci32('10.234'), "1.0234e+01")
+        assert_equal(fsci32('-10.234'), "-1.0234e+01")
+        assert_equal(fpos32('1000.0'), "1000.")
+        assert_equal(fpos32('1.0', precision=0), "1.")
+        assert_equal(fsci32('1.0', precision=0), "1.e+00")
+        assert_equal(fpos32('10.234', precision=0), "10.")
+        assert_equal(fpos32('-10.234', precision=0), "-10.")
+        assert_equal(fsci32('10.234', precision=0), "1.e+01")
+        assert_equal(fsci32('-10.234', precision=0), "-1.e+01")
+        assert_equal(fpos32('10.234', precision=2), "10.23")
+        assert_equal(fsci32('-10.234', precision=2), "-1.02e+01")
+        assert_equal(fsci64('9.9999999999999995e-08', **preckwd(16)),
+                            '9.9999999999999995e-08')
+        assert_equal(fsci64('9.8813129168249309e-324', **preckwd(16)),
+                            '9.8813129168249309e-324')
+        assert_equal(fsci64('9.9999999999999694e-311', **preckwd(16)),
+                            '9.9999999999999694e-311')
+
+
+	# test rounding
+        # 3.1415927410 is closest float32 to np.pi
+        assert_equal(fpos32('3.14159265358979323846', **preckwd(10)),
+                            "3.1415927410")
+        assert_equal(fsci32('3.14159265358979323846', **preckwd(10)),
+                            "3.1415927410e+00")
+        assert_equal(fpos64('3.14159265358979323846', **preckwd(10)),
+                            "3.1415926536")
+        assert_equal(fsci64('3.14159265358979323846', **preckwd(10)),
+                            "3.1415926536e+00")
+        # 299792448 is closest float32 to 299792458
+        assert_equal(fpos32('299792458.0', **preckwd(5)), "299792448.00000")
+        assert_equal(fsci32('299792458.0', **preckwd(5)), "2.99792e+08")
+        assert_equal(fpos64('299792458.0', **preckwd(5)), "299792458.00000")
+        assert_equal(fsci64('299792458.0', **preckwd(5)), "2.99792e+08")
+
+        assert_equal(fpos32('3.14159265358979323846', **preckwd(25)),
+                            "3.1415927410125732421875000")
+        assert_equal(fpos64('3.14159265358979323846', **preckwd(50)),
+                         "3.14159265358979311599796346854418516159057617187500")
+        assert_equal(fpos64('3.14159265358979323846'), "3.141592653589793")
+
+
+	# smallest numbers
+        assert_equal(fpos32(0.5**(126 + 23), unique=False, precision=149),
+                    "0.00000000000000000000000000000000000000000000140129846432"
+                    "4817070923729583289916131280261941876515771757068283889791"
+                    "08268586060148663818836212158203125")
+        assert_equal(fpos64(0.5**(1022 + 52), unique=False, precision=1074),
+                    "0.00000000000000000000000000000000000000000000000000000000"
+                    "0000000000000000000000000000000000000000000000000000000000"
+                    "0000000000000000000000000000000000000000000000000000000000"
+                    "0000000000000000000000000000000000000000000000000000000000"
+                    "0000000000000000000000000000000000000000000000000000000000"
+                    "0000000000000000000000000000000000049406564584124654417656"
+                    "8792868221372365059802614324764425585682500675507270208751"
+                    "8652998363616359923797965646954457177309266567103559397963"
+                    "9877479601078187812630071319031140452784581716784898210368"
+                    "8718636056998730723050006387409153564984387312473397273169"
+                    "6151400317153853980741262385655911710266585566867681870395"
+                    "6031062493194527159149245532930545654440112748012970999954"
+                    "1931989409080416563324524757147869014726780159355238611550"
+                    "1348035264934720193790268107107491703332226844753335720832"
+                    "4319360923828934583680601060115061698097530783422773183292"
+                    "4790498252473077637592724787465608477820373446969953364701"
+                    "7972677717585125660551199131504891101451037862738167250955"
+                    "8373897335989936648099411642057026370902792427675445652290"
+                    "87538682506419718265533447265625")
+
+	# largest numbers
+        assert_equal(fpos32(np.finfo(np.float32).max, **preckwd(0)),
+	            "340282346638528859811704183484516925440.")
+        assert_equal(fpos64(np.finfo(np.float64).max, **preckwd(0)),
+                    "1797693134862315708145274237317043567980705675258449965989"
+                    "1747680315726078002853876058955863276687817154045895351438"
+                    "2464234321326889464182768467546703537516986049910576551282"
+                    "0762454900903893289440758685084551339423045832369032229481"
+                    "6580855933212334827479782620414472316873817718091929988125"
+                    "0404026184124858368.")
+        # Warning: In unique mode only the integer digits necessary for
+        # uniqueness are computed, the rest are 0. Should we change this?
+        assert_equal(fpos32(np.finfo(np.float32).max, precision=0),
+	            "340282350000000000000000000000000000000.")
+
+	# test trailing zeros
+        assert_equal(fpos32('1.0', unique=False, precision=3), "1.000")
+        assert_equal(fpos64('1.0', unique=False, precision=3), "1.000")
+        assert_equal(fsci32('1.0', unique=False, precision=3), "1.000e+00")
+        assert_equal(fsci64('1.0', unique=False, precision=3), "1.000e+00")
+        assert_equal(fpos32('1.5', unique=False, precision=3), "1.500")
+        assert_equal(fpos64('1.5', unique=False, precision=3), "1.500")
+        assert_equal(fsci32('1.5', unique=False, precision=3), "1.500e+00")
+        assert_equal(fsci64('1.5', unique=False, precision=3), "1.500e+00")
+
+    def test_dragon4_interface(self):
+        tps = [np.float16, np.float32, np.float64]
+        if hasattr(np, 'float128'):
+            tps.append(np.float128)
+
+        fpos = np.format_float_positional
+        fsci = np.format_float_scientific
+
+        for tp in tps:
+            # test padding
+            assert_equal(fpos(tp('1.0'), pad_left=4, pad_right=4), "   1.    ")
+            assert_equal(fpos(tp('-1.0'), pad_left=4, pad_right=4), "  -1.    ")
+            assert_equal(fpos(tp('-10.2'),
+                         pad_left=4, pad_right=4), " -10.2   ")
+
+            # test exp_digits
+            assert_equal(fsci(tp('1.23e1'), exp_digits=5), "1.23e+00001")
+
+            # test fixed (non-unique) mode
+            assert_equal(fpos(tp('1.0'), unique=False, precision=4), "1.0000")
+            assert_equal(fsci(tp('1.0'), unique=False, precision=4),
+                         "1.0000e+00")
+
+            # test trimming
+            # trim of 'k' or '.' only affects non-unique mode, since unique
+            # mode will not output trailing 0s.
+            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='k'),
+                         "1.0000")
+
+            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='.'),
+                         "1.")
+            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='.'),
+                         "1.2" if tp != np.float16 else "1.2002")
+
+            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='0'),
+                         "1.0")
+            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='0'),
+                         "1.2" if tp != np.float16 else "1.2002")
+            assert_equal(fpos(tp('1.'), trim='0'), "1.0")
+
+            assert_equal(fpos(tp('1.'), unique=False, precision=4, trim='-'),
+                         "1")
+            assert_equal(fpos(tp('1.2'), unique=False, precision=4, trim='-'),
+                         "1.2" if tp != np.float16 else "1.2002")
+            assert_equal(fpos(tp('1.'), trim='-'), "1")
+
+    def float32_roundtrip(self):
+        # gh-9360
+        x = np.float32(1024 - 2**-14)
+        y = np.float32(1024 - 2**-13)
+        assert_(repr(x) != repr(y))
+        assert_equal(np.float32(repr(x)), x)
+        assert_equal(np.float32(repr(y)), y)
+
+    def float64_vs_python(self):
+        # gh-2643, gh-6136, gh-6908
+        assert_equal(repr(np.float64(0.1)), repr(0.1))
+        assert_(repr(np.float64(0.20000000000000004)) != repr(0.2))
 
 if __name__ == "__main__":
     run_module_suite()