2012-08-02

How to format a double with 5 digits of precision in pure Java

This blog post shows pure Java code to format a double with 5 digits of precision, i.e. non-scientific notation, rounded to at most 5 digits after the decimal point, can be very long in front of the decimal point.

Java's built-in NumberFormat class can be used (see its invocation in the check method below), however that class not available in all JVMs (e.g. Avian). Another option is to convert the double to a string (e.g. "" +d or Double.toString(d) or String.valueOf(d)), and manually analyze the string to convert the scientific notation (e.g. 123.456e78) to decimal notation (see the numberFormat5assumes method below). However, in some JVMS (e.g. Avian) Double.toString(d) returns only 5 digits at most in total, so it loses lots of precision. To work around this, we can convert the double to a long (but divide large doubles first so they would fit), convert the long to a String, and add the decimal point manually (see the numberFormat5 method in the code below). This last solution is inaccurate for large doubles (because of the divisions by powers of 10 involved).

For a more precise, but much more complicated implementation, see dtoa.java, see more on this StackOverflow page.

import java.text.NumberFormat;
import java.util.Locale;

public class nf {
  // This implementation assumes that Double.toString returns the most accurate
  // possible strings.
  public static String numberFormat5assumes(double d) {
    if (Double.isNaN(d)) return "\ufffd";
    if (Double.isInfinite(d)) return d < 0 ? "-\u221e" : "\u221e";
    if (-1 < d && d < 1) {
      boolean isNegative = d < 0 || (d == 0 && "-0.0".equals("" + d));
      if (isNegative) d = -d;
      String sa = "0." + ((100000 + (int)(.5 + d * 100000)) + "").substring(1);
      int i = sa.length();
      while (i > 0 && sa.charAt(i - 1) == '0') {
        --i;
      }
      if (i == 2) i = 1;
      sa = sa.substring(0, i);
      return isNegative ? "-" + sa : sa;
    } else {
      String s = "" + d;
      char c;
      int i = s.length() - 1;
      while (i > 0 && (c = s.charAt(i - 1)) != 'e' && c != 'E') {
        if ((c < '0' || c > '9') && c != '.' && c != '-') {
          throw new RuntimeException("Bad double: " + s);
        }
        --i;
      }
      int j = i;
      int e = 0;
      if (i > 0) {
        while (j < s.length()) {
          e = 10 * e + s.charAt(j++) - '0';
        }
        --i;
      } else {
        i = s.length();
      }
      char o[] = new char[s.length() + e];
      j = 0;
      int w = 0;
      if (s.charAt(0) == '-') {
        o[w++] = '-';
        ++j;
      }
      int t = j;
      while (j < i) {
        if ((c = s.charAt(j)) == '.') {
          t = j + 1;
        } else {
          o[w++] = c;
        }
        ++j;
      }
      if (j - t > e) {
        i = w++;
        while (j - t > e) {
          o[i] = o[i - 1];
          --i;
          ++t;
        }
        o[i++] = '.';
        if (w - i > 5) {
          w = i + 5;
          if (o[i + 5] >= '5') {  // Round up.  (Should be >5.)
            j = i + 5;
            while (j > 0) {
              --j;
              if (o[j] == '.') continue;
              if (o[j] == '-') break;
              if (o[j] != '9') { ++o[j]; j = -1; break; }
              o[j] = '0';
            }
            if (j == 0) {
              if (o[j] == '-') ++j;
              o[j++] = '1';
              while (j < w) {
                if (o[j] == '.') {
                  o[j++] = '0';
                  o[j++] = '.';
                  i = j;
                  break;
                } else {
                  o[j++] = '0';
                }
              }
              w = j;
            }
          }
        }
        while (w > i && o[w - 1] == '0') {
          --w;
        }
        if (w == i) {  // "." -> "".
          --w;
        }
      } else {
        while (j - t < e) {
          o[w++] = '0';
          --t;
        }
      }
      return new String(o, 0, w);
    }
  }

  // This implementation doesn't use Double.toString at all (but it uses the
  // `(long)aDouble' conversion). It's a bit less accurate (can be as few as
  // 16 correct digits out of 21) for very large doubles (abs(d)>=1e13).
  public static String numberFormat5(double d) {
    if (Double.isNaN(d)) return "\ufffd";
    if (Double.isInfinite(d)) return d < 0 ? "-\u221e" : "\u221e";
    boolean isNegative = d < 0 || (d == 0 &&
        "-0.0".equals("" + d) || "-0".equals("" + d));
    if (isNegative) d = -d;
    String s;
    if (d >= 10000000000000.0) {  // 13 zeros.
      // 9223372036854775807 == Long.MAX_VALUE.
      // 1000000000000000000 has 13+5 zeros.
      // TODO: Instead of 9.223e13, check for 9.223372036854775807e13.
      int c = 0;
      // These divisions below are a bit inaccurate, but doing them accurately
      // would need >1000 lines of code. Example inaccuracies:
      //
      // -5.555333333333333E20: the first 18 digits (out of 21) are correct.
      // 1.7976931348623157E308: the first 16 digits are correct.
      while (d >= 9.223e25)   { d /= 10000000000000.0; c += 13; }
      if (d >= 9.223e24)      { d /= 1000000000000.0; c += 12; }
      else if (d >= 9.223e23) { d /= 100000000000.0; c += 11; }
      else if (d >= 9.223e22) { d /= 10000000000.0; c += 10; }
      else if (d >= 9.223e21) { d /= 1000000000.0; c += 9; }
      else if (d >= 9.223e20) { d /= 100000000.0; c += 8; }
      else if (d >= 9.223e19) { d /= 10000000.0; c += 7; }
      else if (d >= 9.223e18) { d /= 1000000.0; c += 6; }
      else if (d >= 9.223e17) { d /= 100000.0; c += 5; }
      else if (d >= 9.223e16) { d /= 10000.0; c += 4; }
      else if (d >= 9.223e15) { d /= 1000.0; c += 3; }
      else if (d >= 9.223e14) { d /= 100.0; c += 2; }
      else if (d >= 9.223e13) { d /= 10.0; c += 1; }
      char cs[] = new char[c];
      while (c > 0) {
        cs[--c] = '0';
      }
      double e = d * 100000.0 + 0.5;
      s = (long)e + new String(cs);
    } else {
      // We have to introduce a temporary variable (e) here for i386 gcj-4.4
      // on Ubuntu Lucid (4.4.3-1ubuntu4.1), without optimization flags.
      // Without this temporary variable it would convert 0.834375 to 83437
      // instead of the correct 83438.
      //
      //   gcj-4.4 -o nf --main=nf nf.java && ./nf
      double e = d * 100000.0 + 0.5;
      s = (long)e + "";
    }
    int i = s.length();
    int j = s.length() - 5;
    while (i > j && i > 0 && s.charAt(i - 1) == '0') {
      --i;
    }
    if (i == 0) {
      s = "0";
    } else if (i == j) {  // Found an integer.
      s = s.substring(0, j);
    } else if (j <= 0) {  // Found a number between 0 and 1.
      s = "0.00000".substring(0, 2 - j) + s.substring(0, i);
    } else {
      s = s.substring(0, j) + "." + s.substring(j, i);
    }
    return isNegative ? "-" + s : s;
  }

  public static void check(double d) {
    NumberFormat nf = NumberFormat.getInstance(Locale.US);
    nf.setMinimumFractionDigits(0);
    nf.setMaximumFractionDigits(5);
    nf.setGroupingUsed(false);
    String a = nf.format(d);
    String b = numberFormat5(d);
    if (!(a.equals(b))) {
      System.err.println(d + ": " + a + " != " + b);
    }
    System.out.println("    check2(" + d + ", \"" + a + "\");");
  }

  public static void main(String[] args) {
    check(42.0);
    check(42.7);
    check(-42.7);
    check(-42.7654321);
    check(-555533333333333333342.7654321);  // numberFormat5 is inaccurate.
    check(Double.NaN);  // FYI gcj-4.4 NumberFormat emits "NaN', openjdk-6 emits "\ufffd".
    check(Double.MIN_VALUE);
    check(Double.MAX_VALUE);  // numberFormat5 is inaccurate.
    check(Double.NEGATIVE_INFINITY);
    check(Double.POSITIVE_INFINITY);
    check(-0.000000034);
    check(0.0);
    check(-0.0);  // FYI gcj-4.4 NumberFormat emits "0", openjdk-6 emits "-0".
    check(-0.7654321);
    check(-0.3456789);
    check(-0.34);
    check(-0.056);
    check(0.0078);
    check(123.456);
    check(-123.456);
    check(-123.456789);
    check(-123.450009);
    check(123.450005);  // NumberFormat is inaccurate: 123.45 != 123.45001.
    check(123.450006);
    check(123.499996);
    check(-123.450003);
    check(-99.999995);
    check(999.999995);
    check(-123.999999);
    check(-123.899999);
    check(0.834375);
    check(-0.834375);
  }
}

No comments: