From: Wheeler Ruml

Subject: RE: numbers and I/O

Date: 1996-8-23 17:03

> I would certainly like to have your fns for reading numbers from an > input stream.
Please let me know if you make any improvements or find any bugs! Thanks, Wheeler -- Wheeler Ruml, Aiken 220, <eecs.harvard.edu, at ruml> (617) 496-1066 (fax) http://www.das.harvard.edu/users/students/Wheeler_Ruml/Wheeler_Ruml.html ====================== ;; This is: ;; $Source: /home/cellar/shieber/ruml/src/lisp/wr-utils/RCS/parse-nums.lisp,v $ ;; $Revision: 1.5 $ ;; $Date: 1996/03/24 18:45:06 $ ;;; Functions for quickly reading numbers from strings and streams. (in-package wr-utils) ;;;;;;;;;;;;; reading integers from a stream (defun eat-read-int (&optional (strm *standard-input*) (err-val nil)) "eats whitespace, then read an integer" (eat-whitespace strm) (read-int strm err-val)) (defun fast-eat-read-int (&optional (strm *standard-input*) (err-val nil)) "eats whitespace, then read an integer" (ib-eat-whitespace strm) (read-int strm err-val 10 #'ib-peek #'ib-read #'ib-unread)) (defun read-int (strm &optional (err-val nil) (radix 10) (peek-func #'peek-char) (read-func #'read-char) (unread-func #'unread-char)) "Reads chars from STRM, trying to parse an integer (with optional starting sign. Does not skip whitespace. Returns the integer. If no digits are read, returns ERR-VAL (defaults to NIL)." (let ((negated-p nil)) (declare (type (or nil t) negated-p)) (let ((ch (funcall peek-func nil strm nil nil))) (declare (type (or null character) ch) (type fixnum radix)) (cond ((char= ch #\-) (funcall read-func strm) (setf negated-p t)) ((char= ch #\+) (funcall read-func strm)))) (let ((num 0) (something-read nil)) (declare (type integer num) (type (or nil t) something-read)) (loop (let ((ch (funcall read-func strm nil nil))) (declare (type (or null character) ch)) (if ch (let ((digit (digit-char-p ch radix))) (declare (type fixnum digit)) (cond (digit (setf something-read t) (setf num (+ (* num radix) digit))) (t (funcall unread-func ch strm) (return)))) (return)))) (if something-read (if negated-p (- num) num) err-val)))) (defun fast-eat-read-fixnum (&optional (strm *standard-input*)) "eats whitespace, then read an integer" (ib-eat-whitespace strm) (ib-read-fixnum strm)) (defun ib-read-fixnum (strm) "Reads chars from STRM, trying to parse a base-10 fixnum (optional starting sign). Skips starting whitespace. Returns the integer. If no digits read, returns NIL." (let ((negated-p nil)) (declare (type (or nil t) negated-p)) ;; read sign (let ((ch (ib-peek nil strm nil nil))) (declare (type (or null character) ch)) (cond ((char= ch #\-) (ib-read strm) (setf negated-p t)) ((char= ch #\+) (ib-read strm)))) (let ((num 0) (something-read nil)) (declare (type fixnum num) (type (or nil t) something-read)) ;; read digits (loop (let ((ch (ib-read strm nil nil))) (declare (type character ch)) (if ch (let ((digit (digit-char-p ch))) (declare (type fixnum digit)) (cond (digit (setf something-read t) (setf num (the fixnum (+ (the fixnum (* num 10)) digit)))) (t (ib-unread ch strm) (return)))) (return)))) ;; return result (when something-read (if negated-p (- num) num))))) ;;;;;;;;;;;; reading floats from a stream (defun eat-read-float (strm &optional (err-val nil)) "eats whitespace, then read a float" (eat-whitespace strm) (read-float strm err-val)) (defun other-eat-read-float (strm &optional (err-val nil)) "eats whitespace, then read a float" (eat-whitespace strm) (other-read-float strm err-val)) (defun fast-eat-read-float (strm &optional (err-val nil)) "eats whitespace, then read a float" (ib-eat-whitespace strm) (read-float strm err-val :peek-func #'ib-peek :read-func #'ib-read :unread-func #'ib-unread)) (defun read-float (&optional (strm *standard-input*) (err-val nil) &key (int-radix 10) (peek-func #'peek-char) (read-func #'read-char) (unread-func #'unread-char)) "reads a float from STRM. Makes lots of assumptions to speed processing! Based on PARSE-FLOAT by <cs.cmu.edu." at mkant> (declare (type (integer 2 36) int-radix)) (let ((negated-p nil) (decimal-p nil) (found-digit-p nil) (before-decimal 0.0d0) (after-decimal 0.0d0) (exponent 0) (decimal-counter 0) (radix (float int-radix 1.0d0)) (result 0.0s0)) (declare (type double-float before-decimal after-decimal radix result) (type fixnum exponent) (type fixnum decimal-counter)) ;; Take care of optional sign. (let ((char (funcall peek-func nil strm nil nil))) (declare (type character char)) (cond ((char= char #\-) (setq negated-p t) (funcall read-func strm)) ((char= char #\+) (funcall read-func strm)))) (loop (let ((char (funcall read-func strm nil nil))) (declare (type character char)) (if char (let ((weight (digit-char-p char int-radix))) (declare (type fixnum weight)) (cond ((and weight (not decimal-p)) ;; A digit before the decimal point (setf before-decimal (+ (float weight 1.0d0) (* before-decimal radix))) (setf found-digit-p t)) ((and weight decimal-p) ;; A digit after the decimal point (setf after-decimal (+ (float weight 1.0d0) (* after-decimal radix))) (setf found-digit-p t) (incf decimal-counter)) ((and (char= char #\.) (not decimal-p)) ;; The decimal point (setq decimal-p t)) ((or (char-equal char #\E) (char-equal char #\D) (char-equal char #\F) (char-equal char #\S) (char-equal char #\L)) ;; E is for exponent (setf exponent (read-int strm err-val int-radix peek-func read-func unread-func)) (return)) (t (funcall unread-func char strm) (return)))) (return)))) ;; Cobble up the resulting number (let* ((frac-fact (expt radix (float (- decimal-counter) 1.0d0))) (number-part (+ before-decimal (* after-decimal frac-fact))) (num-fact (expt radix (float exponent 1.0d0))) (num (* number-part num-fact))) (declare (type double-float frac-fact number-part num-fact num)) (setf result num)) ;; Return the result (values (if found-digit-p (if negated-p (- result) result) err-val)))) ;;;;;;;;;;;;;;; another function for reading floats from a stream (defun other-read-float (strm &optional (err-val nil)) "Reads chars from STRM, trying to parse a number in floating point format. Accepts roughyl [+-]digits[.[+-]digits[[ESLDFeslfd][+-]digits]]. The purpose is to be much faster than read! Does not skip whitespace." (let ((number (read-int strm nil))) (cond (number ;;(format t "starting with int-part ~A.~%" number) (or (read-exp-part strm number nil) (read-frac-part strm number nil) number)) (t err-val)))) ;; helper functions for other read-float (defun read-frac-part (strm number err-val) "tries to read the rest of a floating point number (from decimal point onwards). Returns ERR-VAL if nothing number-like to read, else augements number with right of decimal point and tries to read exponent. If exponent, returns complete number, else number with fractional part." (declare (type stream strm) (type integer number)) (let ((ch (read-char strm nil nil))) (declare (type character ch) (dynamic-extent ch)) (cond ((eq ch #\.) (multiple-value-bind (frac-part places) (read-int strm) (declare (type integer frac-part places) (dynamic-extent frac-part places)) (cond (frac-part (incf number (/ frac-part (expt 10 places))) ;;(format t "Got frac part, now ~A.~%" number) (or (read-exp-part strm number err-val) number))))) (t (unread-char ch strm) err-val)))) (defun read-exp-part (strm number err-val) "tries to read the exponent part of NUMBER from STRM. If succeeds, returns modified NUMBER, else ERR-VAL." (declare (stream strm)) (let ((ch (read-char strm nil nil))) (declare (type character ch) (dynamic-extent ch)) (cond ((null ch) err-val) ((or (char-equal ch #\E) (char-equal ch #\S) (char-equal ch #\F) (char-equal ch #\D) (char-equal ch #\L)) (let ((exponent (read-int strm))) (declare (type integer exponent) (dynamic-extent exponent)) (cond (exponent (setf number (* number (expt 10 exponent))) number) ;; can't back up over E, so error (t err-val)))) (t (unread-char ch strm) err-val)))) ;;;;;;;;;;;; testing functions for reading numbers from streams ;; testing function for read-float (defun trf (str &key (max 1) (func #'read-float)) (with-input-from-string (s str) (let ((i 0)) (do () ((= i max)) (eat-whitespace s) (incf i) (let ((n (funcall func s))) (cond ((numberp n) (format t "Got number: ~A.~%" n)) ((null n) (format t "Function failed.~%") (return)) (t (format t "Function returned odd value: ~A~%" n) (return))))) (format t "Did ~D of ~D iterations. `~A' remains in string.~%" i max (get-waiting-stream-string s))))) ;; 22+0 secs on 4e5 ints (0-9999) (defun test1 (&optional (filename "ints.1")) (with-open-file (f filename :direction :input) (while (eat-read-int f)))) ;; 11+0 secs on 4e5 ints (defun test2 (&optional (filename "ints.1")) (with-open-ib (f filename) (while (fast-eat-read-int f)))) ;; 7+0 secs on 4e5 ints (defun test2.1 (&optional (filename "ints.1")) (with-open-ib (f filename) (while (fast-eat-read-fixnum f)))) ;; 29+1 secs on 4e5 ints (defun test3 (&optional (filename "ints.1")) (with-open-file (f filename :direction :input) (loop (let ((l (read-line f nil nil))) (if l (parse-integer l) (return)))))) ;; 7+1 secs on 5e4 floats between 0-1 (defun test1f (&optional (filename "floats.0")) (with-open-file (f filename :direction :input) (while (eat-read-float f)))) (defun test1.1f (&optional (filename "floats.0")) (with-open-file (f filename :direction :input) (while (other-eat-read-float f)))) ;; 5+1 secs on 5e4 floats between 0-1 (defun test2f (&optional (filename "floats.0")) (with-open-ib (f filename) (while (fast-eat-read-float f)))) ;; 12+1 secs on 5e4 floats between 0-1 (defun test3f (&optional (filename "floats.0")) (with-open-file (f filename :direction :input) (loop (let ((l (read-line f nil nil))) (if l (parse-float l) (return)))))) ;;;;;;;;;;;;;;;;; reading floats from a string ;; ;; Based on version from CMU Lisp repository, please see mkant.copyright ;; for copyright on original version of this function. (defun parse-float (string &key (start 0) (end nil) (radix 10) (type 'single-float) (junk-allowed nil)) "Converts a substring of STRING, as delimited by START and END, to a floating point number, if possible. START and END default to the beginning and end of the string. RADIX must be between 2 and 36. A floating point number will be returned if the string consists of an optional string of spaces and an optional sign, followed by a string of digits optionally containing a decimal point, and an optional e or E followed by an optionally signed integer. The use of e/E to indicate an exponent only works for RADIX = 10. Returns the floating point number, if any, and the index for the first character after the number." ;; END defaults to the end of the string ;; We don't accomplish this by sticking (end (length string)) in the ;; lambda list because I've encountered too many implementations that ;; don't handle such properly. Also, this will work ok if somebody calls ;; the function with :end nil. (setq end (or end (length string))) ;; Skip over whitespace. If there's nothing but whitespace, signal an error. (let ((index (or (position-if-not #'whitespace-p string :start start :end end) (if junk-allowed (return-from parse-float (values nil end)) (error "No non-whitespace characters in number.")))) (minusp nil) (decimalp nil) (found-digit nil) (before-decimal 0) (after-decimal 0) (decimal-counter 0) (exponent 0) (result 0)) (declare (fixnum index)) ;; Take care of optional sign. (let ((char (char string index))) (cond ((char= char #\-) (setq minusp t) (incf index)) ((char= char #\+) (incf index)))) (loop (when (= index end) (return nil)) (let* ((char (char string index)) (weight (digit-char-p char radix))) (cond ((and weight (not decimalp)) ;; A digit before the decimal point (setq before-decimal (+ weight (* before-decimal radix)) found-digit t)) ((and weight decimalp) ;; A digit after the decimal point (setq after-decimal (+ weight (* after-decimal radix)) found-digit t) (incf decimal-counter)) ((and (char= char #\.) (not decimalp)) ;; The decimal point (setq decimalp t)) ((and (or (char-equal char #\E) (char-equal char #\D) (char-equal char #\F) (char-equal char #\S) (char-equal char #\L)) (= radix 10)) ;; E is for exponent (multiple-value-bind (num idx) (parse-integer string :start (1+ index) :end end :radix radix :junk-allowed junk-allowed) (setq exponent (or num 0) index idx) (when (= index end) (return nil)))) (junk-allowed (return nil)) ((whitespace-p char) (when (position-if-not #'whitespace-p string :start (1+ index) :end end) (error "There's junk in this string: ~S." string)) (return nil)) (t (error "There's junk in this string: ~S." string)))) (incf index)) ;; Cobble up the resulting number (setq result (coerce (* (+ before-decimal (* after-decimal (coerce (expt radix (- decimal-counter)) 'double-float))) (coerce (expt radix exponent) 'double-float)) type)) ;; Return the result (values (if found-digit (if minusp (- result) result) (if junk-allowed nil (error "There are no digits in this string: ~S" string))) index))) ;;; EOF - parse-nums.lisp ================================== ;; This is: ;; $Source: /home/cellar/shieber/ruml/src/lisp/wr-utils/RCS/fast-io.lisp,v $ ;; $Revision: 1.5 $ ;; $Date: 1996/03/29 22:38:36 $ ;;; Functions for fast but inflexible I/O. ;;; ;;; Slightly modified version of code from Thomas Kirk <research.att.com at tk> ;;; posted on comp.lang.lisp on Tue, 23 Feb 1993 22:58:39 GMT and now ;;; found at the CMU Lisp Repository. ;;; ;;; Seems a better route than the FAST-READ-FILE-CHAR of Doug Cutting ;;; <parc.xerox.com at cutting> posted to <ucbvax.berkeley.edu at allegro-cl> on ;;; Tue, 23 Feb 1993 13:38:19 PST and available in CL-LIB from rochester, ;;; which peeks inside ACL stream instances (using svref, now disallowed) ;;; to get characters. (in-package wr-utils) ;;;;;;;;; data structure ;; used in structure definition and when opening stream (deftype ib-element () `character) (deftype ib-index () `(integer 0 4096)) (defstruct ib (stream nil) (buf (make-array 4096 :element-type 'ib-element) :type (simple-array ib-element (4096))) (nbytes 0 :type ib-index)while (ib-read-line stream) (offset 0 :type ib-index) (unread-element nil)) ;;;;;;;;;; low-level operations ;; seems to take about 57% as long as read-char (defun ib-read (ib &optional (eof-error-p t) (eof-value nil)) (declare (type ib ib)) (let ((unread (ib-unread-element ib))) (cond (unread (prog1 unread (setf (ib-unread-element ib) nil))) (t (unless (< (ib-offset ib) (ib-nbytes ib)) (ib-fill-buffer ib) (when (zerop (ib-nbytes ib)) (if eof-error-p (error "Attempt to IB-READ past EOF!") (return-from ib-read eof-value)))) (prog1 (aref (ib-buf ib) (ib-offset ib)) (incf (ib-offset ib))))))) (defun ib-fill-buffer (ib) "not intended to be called by user" (declare (type ib ib)) (setf (ib-nbytes ib) (read-sequence (ib-buf ib) (ib-stream ib))) (setf (ib-offset ib) 0) (values)) (defun ib-unread (element ib) (declare (type ib ib)) (if (ib-unread-element ib) (error "Can only unread one element from an IB!") (setf (ib-unread-element ib) element)) (values)) (defun ib-peek (peek-type ib &optional (eof-error-p t) (eof-value nil)) "Just like read, except doesn't advance the offset" (declare (type ib ib)) (cond ((eq peek-type t) (ib-eat-whitespace ib)) ((characterp peek-type) (error "Character peek-types not yet supported!"))) (let ((unread (ib-unread-element ib))) (cond (unread unread) (t (unless (< (ib-offset ib) (ib-nbytes ib)) (ib-fill-buffer ib) (when (zerop (ib-nbytes ib)) (if eof-error-p (error "Attempt to IB-READ past EOF!") (return-from ib-peek eof-value)))) (aref (ib-buf ib) (ib-offset ib)))))) ;;;;;;;; medium-level interface (defun open-ib (filename) (let ((stream (open filename :direction :input :element-type 'ib-element))) (make-ib :stream stream))) (defun close-ib (ib) (close (ib-stream ib))) (defmacro with-open-ib ((ib-var file-name) &body body) `(let ((,ib-var (open-ib ,file-name))) (declare (type ib ,ib-var)) (unwind-protect (progn <body) at ,> (close-ib ,ib-var)))) (defun ib-from-stream (stream) (make-ib :stream stream)) (defmacro with-ib-from-stream ((ib-var stream) &body body) "using this macro helps prevent ib from getting out of sync with stream" `(let ((,ib-var (make-ib :stream ,stream))) ;; REBIND ORDINARY STREAM FUNCTIONS?? <body)) at ,> (defun ib-eat-whitespace (ib) (loop (let ((ch (ib-read ib nil nil))) (unless (whitespace-p ch) (ib-unread ch ib) (return))))) (defun ib-skip-line (ib) (loop (let ((ch (ib-read ib))) (cond ((null ch) (return nil)) ((char= ch #\newline) (return t)))))) (defun ib-skip-token (ib) (ib-eat-whitespace ib) (loop (let ((ch (ib-read ib))) (when (whitespace-p ch) (ib-unread ch ib) (return))))) (defconstant *ib-read-line-length* 256) (defconstant *max-ib-read-line-length* 256) (defun ib-read-line (ib &optional (eof-error-p t) (eof-value nil) (recursive-p nil) &key (start-line-len *ib-read-line-length*) (max-line-len *max-ib-read-line-length*) (skip-remaining-line t)) (declare (ignore recursive-p)) (let ((line (make-array start-line-len :element-type 'character)) (line-len start-line-len) (index 0)) (declare (type (basic-vector character) line) (type fixnum index line-len)) (loop (let ((ch (ib-read ib eof-error-p eof-value))) (cond ((eq ch eof-value) (if (> index 0) (subseq line 0 index) (return eof-value))) ((char= ch #\newline) (return (subseq line 0 index))) (t (when (= index line-len) (let ((new-len (* 2 line-len))) (when (> new-len max-line-len) (when skip-remaining-line (ib-skip-line ib)) (return line)) (setf line-len new-len) (setf line (clone-array line new-len :elt-type character)))) (setf (aref line index) ch) (incf index))))))) (defun ib-eat-newline-p (strm) (let ((ch (ib-read strm nil nil))) (declare (type (or character null) ch)) (cond ((null ch) nil) ((char= ch #\Newline) t) ((whitespace-p ch) nil) (t (ib-unread ch strm))))) ;;;;;;; testing ;;; read n chars from file (defun test-ib (file n) (time (with-open-ib (ib file) (dotimes (i n) (ib-read ib)))) (time (with-open-file (f file) (dotimes (i n) (read-char f))))) ;;; EOF - fast-io.lisp