;;;;
;;;; The :rational-to-string package provides the following methods
;;;; for converting between arbitrary precision floating point and
;;;; rational numbers:
;;;;
;;;;     (rational-to-string)  
;;;;     (string-to-rational)
;;;;     (parse-decimal-string)
;;;;     (normalize-rational)
;;;;     (approx-log10)
;;;;


(defpackage :rational-to-string
  (:use :cl)
  (:export #:rational-to-string
           #:string-to-rational
           #:parse-decimal-string
           #:normalize-rational
           #:ilog10
           #:approx-log10
           #:rational-to-string-version))           

(in-package :rational-to-string)

(defun rational-to-string-version ()
  "Return the version for the rational-to-string package."
  (values "1.0.5" 1 0 5))

(defun common-round (x)
  "Round the rational X to the nearest integer.  Ties are rounded to
the largest absolute value.  This is in contrast to the Common
Lisp (round) which rounds ties to the even integer."
  (if (minusp x)
    (ceiling  (- x (/ 1 2)))
    (floor (+ x (/ 1 2)))))

(defun parse-decimal-string (s)
  "This function parses the string S which should be the string
representation of an arbitrary precision floating point number in
the the following form:

    \"[+|-]<whole_part>.<fract_part>[e|E[+|-]<exp_part>]\"

This function returns

    (values <ok>
            <sign>
            <whole_part>
            <fract_part>
            <fract_part_digit_count>
            <exp_part>
            <remainder_of_s>)

where <ok> is T if a decimal string could be extracted from the front
of S or NIL otherwise.  If <ok> is T, the trailing part of S that did
not contribute to the decimal string is returned as the last value.

The <sign> value that is returned will be the symbol :+ or :-
depending on whether the sign of the number is positive or negative.
<whole_part>, <fract_part>, and <exp_part> will hold the whole part,
fractional part, and exponential part respectively.

The <fract_part_digit_count> is the number of decimal digits in
<fract_part>.  This allows you calculate the fractional part using
arbitrary precision rational arithmetic as follows:

    (/ <fract_part> (expt 10 <fract_part_digit_count>))"

  ;;
  ;; NOTE: The first version of this function used regular expressions
  ;; to dissect "s".  This worked fine for cl-ppcre, but I had some
  ;; problems with the regex engine in Emacs Lisp when I tried to port
  ;; this code.  So, I just rewrote this function using a family of
  ;; recursive functions to do the same job as the regex.
  ;;

  (labels

      ((is-white-space (c)
         "Return true if the character C is white space."
         (member c '(#\space #\tab #\return #\newline)))

       (skip-white-space (cs)
         "Remove the leading white space from the list of characters
CS and return the remaining list of characters."
         (cond ((or (null cs)
                    (not (is-white-space (car cs))))
                cs)
               (t (skip-white-space (cdr cs)))))

       (extract-sign (cs)
         "Extract the sign from the top of the list of characters CS
and return remaining list of characters and the sign as either the
symbol :+ or :-.  By default, if no sign is present, :+ is returned as
the sign."
         (cond ((null cs) (values cs :+))
               ((eql (car cs) #\+) (values (cdr cs) :+))
               ((eql (car cs) #\-) (values (cdr cs) :-))
               (t (values cs :+))))

       (extract-uinteger (cs &optional (acc nil) (acc-length 0))
         "Extract the unsigned integer from the top of the list of
characters CS and return the remaining list of characters and the
unsigned integer.  If an unsigned integer cannot be extracted, NIL is
returned in place of the unsigned integer value."
         (cond ((null cs)
                (values cs acc acc-length))
               ((or (char< (car cs) #\0)
                    (char> (car cs) #\9))
                (values cs acc acc-length))
               (t (extract-uinteger
                   (cdr cs)
                   ;; acc = acc * 10 + <digit>
                   (+ (* (if acc acc 0) 10)
                      (- (char-code (car cs))
                         (char-code #\0)))
                   (+ acc-length 1)))))

       (remove-decimal-point (cs)
         "Remove the decimal point from the top of the list of
characters CS and return the remaining list of characters."
         (cond ((null cs) cs)
               ((eql (car cs) #\.) (cdr cs))
               (t cs)))

       (extract-exponent (cs)
         "Remove the exponent from the top of the list of characters
CS and return the remaining list of characters.  For example, if
\"e-15\" is at the top of CS, all four characters will be removed from
CS and -15 will be returned as the exponent.  The exponent must be an
integral value."
         ;;
         ;; This requires a little bit of look-ahead because we don't
         ;; want to return "cs" as-is if we cannot find the exponent.
         ;; This means we cannot consume "e" or "e-" until we know
         ;; that an exponent exists on the other side of "e" or "e-".
         ;;
         (cond
           ;; Nothing to do if the string is empty.
           ((null cs)
            (values cs nil))
           ;; Nothing to do if the first character is not "e" or "E".
           ((and (not (eql (car cs) #\e))
                 (not (eql (car cs) #\E)))
            (values cs nil))
           ;; The first character is "e" or "E" so we now need to
           ;; handle the optional sign which can be either "+", "-",
           ;; or "".
           (t (let ((has-sign (or (eql (cadr cs) #\+)
                                  (eql (cadr cs) #\-)))
                    (is-negative (eql (cadr cs) #\-)))
                (multiple-value-bind (cs-new exponent)
                    ;; Fetch the exponent make sure to skip passed the
                    ;; sign as required by (extract-uinteger).
                    (extract-uinteger (if has-sign (cddr cs) (cdr cs)))
                  (if exponent
                      (values cs-new (if is-negative (- exponent) exponent))
                      (values cs nil))))))))

    ;;
    ;; Parse the string.
    ;;

    (let ((cs (skip-white-space (coerce s 'list))))
      (multiple-value-bind (cs sign)
          (extract-sign cs)
        (multiple-value-bind (cs whole-part)
            (extract-uinteger cs)
          (let ((cs (remove-decimal-point cs)))
            (multiple-value-bind (cs fract-part fract-part-digit-count)
                (extract-uinteger cs)
              (multiple-value-bind (cs exp-part)
                  (extract-exponent cs)
                ;; If there are still characters, in the list the
                ;; input string is malformed.  The input is also
                ;; malformed if both whole-part and fract-part could
                ;; not be found.  The input is ok if either one could
                ;; be found (e.g., "1", ".1").
                (let ((success (if (or whole-part fract-part) t nil)))
                  (values success
                          sign
                          (if whole-part whole-part 0)
                          (if fract-part fract-part 0)
                          fract-part-digit-count
                          (if exp-part exp-part 0)
                          (if success
                              (coerce cs 'string)
                              s)))))))))))

(defun ilog10 (n)
  "With some efficiency, return the (floor (log10 n)) if (log10 n) is
positive and the (ceil (log10 n)) if (log10 n) is negative.  This is
used by (normalize-rational) to get a good approximation of how far it
needs to shift in order to normalize N."
  (labels
      ((iter (n x x-old count count-old acc)
         (cond ((and (<= 1 n) (< n 10)) (+ count-old acc))
               ((< n 1) (- (ilog10 (/ n))))
               ((>= n x) (iter n (* x x) x (* 2 count) count acc))
               (t (iter (/ n x-old) 10 1 1 0 (+ count-old acc))))))
    (if (<= n 0)
        (error "ilog10: domain error: ~a" n)
        (iter n 10 1 1 0 0))))

(defun approx-log10 (n)
    "Return the approximate log (base-10) of the rational number N
such that (1 <= (abs (expt n (approx-log10 n))) < 10).  This is used
by (normalize-rational) to normalize N."
    (labels
        ;; Simple but very inefficient loop that increments or
        ;; decrements the exponent until n is normalized.
        ;; Fortunately, the approximation below that uses (ilog10) is
        ;; very good such that this loop should iterate only about twice.
        ((iter (x exponent)
           (let* ((y (expt 10 exponent))
                  (z (/ x y)))
             (cond ((>= z 10) (iter x (+ exponent 1)))
                   ((< z 1) (iter x (- exponent 1)))
                   (t exponent)))))
      (if (<= n 0)
          (error "approx-log10: domain error: n = ~a" n)
          (if (and (>= n 1) (< n 10))
              0
              ;; Get a very good approximation using (ilog10) and pass
              ;; it into (iter) which does a linear search from there.
              (iter n (ilog10 n))))))

(defun normalize-rational (n)
  "Normalize the rational number N by returning the multiple
values (values SIGNIFICAND EXPONENT) such that

    N = SIGNIFICAND * 10 ^ EXPONENT

where 1 <= SIGNIFICAND < 10."
  (if (zerop n)
      (values n 0)
      (let ((exponent (approx-log10 (abs n))))
        (values (* n (expt 10 (- exponent))) exponent))))

(defun rational-to-string (n
                           precision
                           &key
                             (trim t)
                             (sci nil)
                             (estr "e"))
  "Convert the rational number N to a string holding the decimal
representation of N having PRECISION digits after the decimal point.
If TRIM is true, trailing zeros in the fractional part will be
trimmed; otherwise, trailing zeros will be returned.  If SCI is true,
the result will be in scientific notation with the value of ESTR
inserted between the significand and the exponent."

  ;;
  ;; The point of this function is to get around a limitation in
  ;; either my knowledge of lisp or lisp itself.  I cannot find a
  ;; (format) specificiation that will print a _rational_ with an
  ;; arbitrary amount of precision.  Instead, (format) always prints a
  ;; rational in the form "numerator/denominator".
  ;;
  ;; On the other hand, it is trivial to print an _integer_ with an
  ;; arbitrary amount of precision.  So the algorithm below basically
  ;; shifts "precision" worth of digits from the rational number
  ;; across to the left side of the decimal point.  It then uses
  ;; (common-round) to round off to an integer.  It then inserts a
  ;; decimal point into the correct location of the resulting string.
  ;;

  (declare (type rational n))

  (labels

      ((add-zeros (s precision)
         "Add leading zeros to the scaled value held in the string S.
This is needed so that, when numbers like 0.001 are scaled (by
multiplying below by (expt 10 precision)), the significant zeros just
to the right of the decimal place are not lost. This is important
because we do the scaling as a cheap way of getting all of the digits
to the left-hand side of the decimal point where (princ-to-string) can
convert them to a string.  After inserting all the digits into what we
are calling the scaled string S, it is still necessary to go back into
S and insert the decimal point which requires all significant zeros to
be present."

         (labels
             ((make-zeros (n &optional (acc nil))
                "Prepend N zero characters to acc and return as a string."
                (cond ((<= n 0) (coerce acc 'string))
                      (t (make-zeros (- n 1) (cons #\0 acc))))))
           (let ((slen (length s)))
             (cond ((< slen precision)
                    (concatenate 'string
                                 (make-zeros (- precision slen))
                                 s))
                   (t s)))))

       (trim-trailing-zeros (s)
         (labels
             ((iter (cs)
                "Iterate over the reversed list of characters in CS
removing the trailing zeros.  Because the list of characters is
reversed, the trailing zeros all appear at the front of the CS list.
This function simply removes one trailing zero from CS before calling
itself recursively with the remainder.  When one of the base cases is
reached, it reverses cs and coerces the result to a string."
                (cond
                  ;; First base case.  Note that if we reach the base
                  ;; case and the accumulator is still empty, it means
                  ;; all the digits were trailing zeros and were removed
                  ;; in which case we have to force #\0 to be returned.
                  ((null cs) "0")
                  ;; Second base case.  Any non-zero character means
                  ;; we have reached the end of the trailing zeros.
                  ((not (eql (car cs) #\0))
                   ;; You want to return strings like "1" instead of
                   ;; "1." so in this one case, we skip the trailing
                   ;; decimal point too.
                   (let ((target (if (eql (car cs) #\.) (cdr cs) cs)))
                     (coerce (reverse target) 'string)))
                  ;; Must be a trailing zero.
                  (t (iter (cdr cs))))))
           (if (find #\. s)
               ;; s has a decimal point.
               (iter (reverse (coerce s 'list)))
               ;; s does not have a decimal point so do not remove the
               ;; trailing digits because they are significant.
               s)))

       (insert-decimal-point (s precision)
         (let ((pivot (- (length s) precision)))
           (when (< pivot 0) (error "Internal error.  Pivot too small."))
           (concatenate 'string
                        (if (= pivot 0) "0" (subseq s 0 pivot))
                        (if (= precision 0)
                            ""
                            (concatenate 'string "." (subseq s pivot))))))

       (to-basic-string (n precision)
         "Convert the rational number N to the string representation
of a basic floating point number with PRECISION worth of digits after
the decimal point."
         (let*
              ;; x = round(abs(n) * 10^precision)
             ((x (common-round (* (abs n) (expt 10 precision))))
              (s (princ-to-string x))
              (s (add-zeros s precision))
              (s (insert-decimal-point s precision))
              (rv (if trim (trim-trailing-zeros s) s)))
           (if (< n 0) (concatenate 'string "-" rv) rv)))

       (to-sci-string (n precision)
         "Convert the rational number N to the string representation
of a floating point number in normalized scientific notation with
PRECISION worth of digits after the decimal point."
       (multiple-value-bind (x exponent) (normalize-rational n)
         (concatenate 'string
                      (to-basic-string x precision)
                      estr
                      (princ-to-string exponent)))))

    (when (minusp precision)
      (error "precision cannot be negative"))

    (if sci
        (to-sci-string n precision)
        (to-basic-string n precision))))

(defun string-to-rational (s)
  "Convert the string S to a rational number and return the result as
the first value.  The second value returned is the remainder of
S (i.e., the tail of S that is not part of the decimal number).  If an
error occurs NIL is returned as the first value, and S is returned as
the second value.  Leading white space is removed from S before
attempting to do the conversion.

For example

    (string-to-rational \" 123e-3foo\") --> (values (/ 123 1000) \"foo\")
"

  (declare (type string s))

  (multiple-value-bind (ok
                        sign
                        whole-part
                        fract-part
                        fract-part-digit-count
                        exp-part
                        remainder)
      (parse-decimal-string s)
    (if (not ok)
        (values nil s)
        (let ((magnitude (* (+ whole-part
                               (/ fract-part (expt 10 fract-part-digit-count)))
                            (expt 10 exp-part))))
          (if (eq sign :-)
              (values (- magnitude) remainder)
              (values magnitude remainder))))))
