;;;; This library contains a number of functions which collect and skip characters in a text file.
;;;; These functions may, for instance, be used to parse a file.<p>
;;;; It is assumed that the variable ip references an input port. The assignment of ip must be done
;;;; exernally to this library, and after the library is loaded.<p>
;;;; The main functions can be found in the section Collection and skipping functions below.<p>
;;;; This library has been developed as part of an SGML Document Type Definition (DTD) parser.
;;;; There exists <a href="../../tools/dtd-parser/doc/html/index.html">internal documentation</a> of the DTD parser,
;;;; as such also of some aspects of the functions in this library.


Show source file in small font In collect-skip: Link from ip to it's cross reference table entry 
(define ip #f) ; ======================================================================================================================== ;;; Look ahead buffer and queue. ;;; The functions in this section manipulates a look ahead queue, which is in between the input port ip ;;; and the applications. Via this buffer it is possible to implement look ahead in the input port. ;; The length of the cyclic look ahead buffer. Predefined to 2000 characters. Show source file in small font In collect-skip: Link from max-look-ahead to it's cross reference table entry 
(define max-look-ahead 2000) Show source file in small font In collect-skip: Link from look-ahead-vector to it's cross reference table entry 
(define look-ahead-vector (make-vector max-look-ahead #\space)) Show source file in small font In collect-skip: Link from next-write to it's cross reference table entry 
(define next-write 0) Show source file in small font In collect-skip: Link from next-read to it's cross reference table entry 
(define next-read 0) Show source file in small font In collect-skip: Link from look-ahead-length to it's cross reference table entry 
(define look-ahead-length 0) Show source file in small font In collect-skip: Link from end-of-file? to it's cross reference table entry 
(define end-of-file? #f) ;; Reset the look ahead buffer. Show source file in small font In collect-skip: Link from reset-look-ahead-buffer to it's cross reference table entry 
(define (reset-look-ahead-buffer) (set! ip #f) (set! next-write 0) (set! next-read 0) (set! look-ahead-length 0) (set! look-ahead-vector (make-vector max-look-ahead #\space)) (set! end-of-file? #f) (set! collection-buffer (make-string buffer-length #\space)) ) ;; Peek a character from the input port, but queues it for subsequent reading ;; at "the peek end". ;; This function always reads one character via read-char. Show source file in small font In collect-skip: Link from peek-a-char to it's cross reference table entry 
(define (peek-a-char) (let ((ch (read-char ip))) (if (eof-object? ch) (begin (set! end-of-file? #t) ch) (begin (vector-set! look-ahead-vector next-write ch) (set! next-write (+ 1 next-write)) (set! look-ahead-length (+ 1 look-ahead-length)) (if (> look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (if (>= next-write max-look-ahead) (set! next-write 0)) ch)))) ;; Peeks n charcters Show source file in small font In collect-skip: Link from peek-chars to it's cross reference table entry 
(define (peek-chars n) (cond ((> n 0) (begin (let ((ch (peek-a-char))) (if (not (eof-object? ch)) (peek-chars (- n 1)))))) ((< n 0) (error "peek-chars: Called with negative argument")))) ;; Read from the the look ahead buffer. Only if this buffer is empty, read from the port. ;; Reads from "the read end" of the queue. Show source file in small font In collect-skip: Link from read-a-char to it's cross reference table entry 
(define (read-a-char) (if (> look-ahead-length 0) (let ((ch (vector-ref look-ahead-vector next-read))) (set! next-read (+ next-read 1)) (set! look-ahead-length (- look-ahead-length 1)) (if (>= next-read max-look-ahead) (set! next-read 0)) ch) (let ((ch (read-char ip))) (if (eof-object? ch) (set! end-of-file? #t)) ch))) ;; Read and return a string of length n. ;; Should take eof into account such that a string shorter than n can be returned. Show source file in small font In collect-skip: Link from read-a-string to it's cross reference table entry 
(define (read-a-string n) (let ((res (make-string n #\space))) (read-a-string-1 0 n res) res)) Show source file in small font In collect-skip: Link from read-a-string-1 to it's cross reference table entry 
(define (read-a-string-1 i n str) (cond ((>= i n) str) (else (begin (string-set! str i (read-a-char)) (read-a-string-1 (+ i 1) n str))))) ;; Return a lgt character string from the peeked chars in the queue. Show source file in small font In collect-skip: Link from look-ahead-prefix to it's cross reference table entry 
(define (look-ahead-prefix lgt) (if (>= look-ahead-length lgt) (look-ahead-prefix-1 0 next-read lgt (make-string lgt #\space)) (error (string-append "look-ahead-prefix: requires the look ahead to be in the queue, " (as-string lgt) )))) ; i is the index into the formed string. ; j is the index into the look-ahead queue ; lgt is the desired length of the extracted string ; res is the (tail recursive) result. Show source file in small font In collect-skip: Link from look-ahead-prefix-1 to it's cross reference table entry 
(define (look-ahead-prefix-1 i j n res) (if (>= i n) res (begin (string-set! res i (vector-ref look-ahead-vector j)) (look-ahead-prefix-1 (+ i 1) (if (= j (- max-look-ahead 1)) 0 (+ j 1)) ; A program source marker WITHOUT a link to the documentation n res)))) ;; Return the entire look ahead queue as a string Show source file in small font In collect-skip: Link from max-look-ahead-prefix to it's cross reference table entry 
(define (max-look-ahead-prefix) (look-ahead-prefix look-ahead-length)) ;; Return the first character in the look ahead vector. ;; As a precondition, the look ahead queue is assumed not to be empty Show source file in small font In collect-skip: Link from look-ahead-char to it's cross reference table entry 
(define (look-ahead-char) (if (>= look-ahead-length 1) (vector-ref look-ahead-vector next-read) (error "look-ahead-char: Cannot look ahead in emtpy look ahead queue"))) ;; Return whether the queue contents match the string str. ;; The queue must contain (length str) characters in order to call this function. ;; If not, an error is issued. ;; This is a proper function (appart from the error condition). Show source file in small font In collect-skip: Link from match-look-ahead? to it's cross reference table entry 
(define (match-look-ahead? str) (let* ((lgt (string-length str))) (if (>= look-ahead-length lgt) (equal? (look-ahead-prefix lgt) str) (error "match-look-ahead?: String matching requires sufficient peeked characters")))) ;; Make sure that there is at least n characters in the look ahead queue Show source file in small font In collect-skip: Link from ensure-look-ahead to it's cross reference table entry 
(define (ensure-look-ahead n) (if (< look-ahead-length n) (peek-chars (- n look-ahead-length)))) ; ---------------------------------------------------------------------------- ; Put back facility at the write end. Part of the look ahead queue. ; Alternatively - and more useful - put back should take place at the read end. ;; Put ch back at the rear end of the queue (where peek-a-char operates). Show source file in small font In collect-skip: Link from put-back-a-char-write-end to it's cross reference table entry 
(define (put-back-a-char-write-end ch) (vector-set! look-ahead-vector next-write ch) (set! next-write (+ 1 next-write)) (set! look-ahead-length (+ 1 look-ahead-length)) (if (> look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (if (>= next-write max-look-ahead) (set! next-write 0))) ;; Put ch back at the front end of the "queue" (where read-a-char operates). Show source file in small font In collect-skip: Link from put-back-a-char-read-end to it's cross reference table entry 
(define (put-back-a-char-read-end ch) (if (<= next-read 0) (set! next-read (- max-look-ahead 1))) (set! look-ahead-length (+ look-ahead-length 1)) (if (>= look-ahead-length max-look-ahead) (error "Lookahead buffer capacity exceeded")) (set! next-read (- next-read 1)) (vector-set! look-ahead-vector next-read ch)) ;; Put str back in queue. The second parameter which-end controls whether to put back ;; in read end or write end. Possible values 'read-end and 'write-end. Show source file in small font In collect-skip: Link from put-back-a-string to it's cross reference table entry 
(define (put-back-a-string str which-end) (cond ((= 0 (string-length str)) 'nothing) ((eq? which-end 'write-end) (put-back-a-string-write-end str 0 (- (string-length str) 1))) ((eq? which-end 'read-end) (put-back-a-string-read-end str 0 (- (string-length str) 1))) (else (error "put-back-a-string: Unknown end indicator")))) Show source file in small font In collect-skip: Link from put-back-a-string-write-end to it's cross reference table entry 
(define (put-back-a-string-write-end str i max) (put-back-a-char-write-end (string-ref str i)) (if (< i max) (put-back-a-string-write-end str (+ i 1) max))) Show source file in small font In collect-skip: Link from put-back-a-string-read-end to it's cross reference table entry 
(define (put-back-a-string-read-end str min i) (put-back-a-char-read-end (string-ref str i)) (if (> i min) (put-back-a-string-read-end str min (- i 1)))) ;; Provided that there is at least n characters in the reading queue, advance ;; next-read with n positions. Hereby queued characters are skipped. Not used in dtd parsing. Show source file in small font In collect-skip: Link from advance-look-ahead to it's cross reference table entry 
(define (advance-look-ahead n) (if (> n look-ahead-length) (error (string-append "Cannot advance the look ahead with " (as-string n) " positions"))) (if (> n 0) (begin (set! next-read (+ next-read 1)) (set! look-ahead-length (- look-ahead-length 1)) (if (>= next-read max-look-ahead) (set! next-read 0)) (advance-look-ahead (- n 1))))) ; End of look ahead buffer (queue) ; ; ----------------------------------------------------- ;;; Collection and skipping functions. ;;; This section contains a number of higher level collection and skipping functions. ;;; These functions use the funtions from the previous section. The functions in this ;;; section are the most important of this library. Show source file in small font In collect-skip: Link from buffer-length to it's cross reference table entry 
(define buffer-length 10000) Show source file in small font In collect-skip: Link from collection-buffer to it's cross reference table entry 
(define collection-buffer (make-string buffer-length #\space)) ;; Return the string collected from the input port ip. ;; The collection stops when the predicate p holds holds on the character read. ;; The last read character (the first character on which p holds) is left as the oldest character in the queue. Show source file in small font In collect-skip: Link from collect-until to it's cross reference table entry 
(define (collect-until p) (collect-until-1 p ip collection-buffer 0) ) Show source file in small font In collect-skip: Link from collect-until-1 to it's cross reference table entry 
(define (collect-until-1 p ip buffer next) (cond ((>= next buffer-length) (error "collect-until-1: Collection buffer is filled. You can enlarge it via the variable buffer-length")) ((and (> look-ahead-length 0) (p (as-char (look-ahead-prefix 1)))) (substring buffer 0 next)) ((and (> look-ahead-length 0) (not (p (as-char (look-ahead-prefix 1))))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-until-1 p ip buffer (+ 1 next)))) ((and (= look-ahead-length 0)) (let ((ch (peek-a-char))) (if (p ch) (substring buffer 0 next) (begin (string-set! buffer next ch) (read-a-char) (collect-until-1 p ip buffer (+ 1 next)))))))) ;; This collection procedure returns a balanced collection given two char predicates. ;; Return the string collected from the input port ip. The collection stops when the predicate char-pred-2 holds holds on the character read. ;; However, if char-pred-1 becomes true it has to be matched by char-pred-2 without causing a termination of the collection. ;; The last read character (the first character on which char-pred-2 holds) is processed by this function. ;; As a precondition assume that if char-pred-1 holds then char-pred-2 does not hold, and vice versa. Show source file in small font In collect-skip: Link from collect-balanced-until to it's cross reference table entry 1.2. Handling white space, comments, and front matters.
(define (collect-balanced-until char-pred-1 char-pred-2) (collect-balanced-until-1 char-pred-1 char-pred-2 ip collection-buffer 0 0)) Show source file in small font In collect-skip: Link from collect-balanced-until-1 to it's cross reference table entry 
(define (collect-balanced-until-1 q p ip buffer next bal-count) (ensure-look-ahead 1) (cond ((>= next buffer-length) (parse-error "collect-until-1: Collection buffer is filled. You can enlarge it via the variable buffer-length")) ((and (p (as-char (look-ahead-prefix 1))) (= bal-count 0)) (parse-error "End delimitor matched before start delimitor")) ((and (p (as-char (look-ahead-prefix 1))) (= bal-count 1)) (string-set! buffer next (read-a-char)) (substring buffer 0 (+ next 1))) ((and (p (as-char (look-ahead-prefix 1))) (> bal-count 1)) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) (- bal-count 1)))) ((and (q (as-char (look-ahead-prefix 1)))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) (+ bal-count 1)))) ((and (not (p (as-char (look-ahead-prefix 1)))) (not (q (as-char (look-ahead-prefix 1))))) (let ((ch (read-a-char))) (string-set! buffer next ch) (collect-balanced-until-1 q p ip buffer (+ 1 next) bal-count))))) ;; Skip characters while p holds. ;; The first character on which p fails is left as the oldest character in the queue ;; The predicate does not hold if end of file Show source file in small font In collect-skip: Link from skip-while to it's cross reference table entry 
(define (skip-while p) (cond ((and (not end-of-file?) (> look-ahead-length 0) (p (as-char (look-ahead-prefix 1)))) (begin (read-a-char) (skip-while p))) ((and (not end-of-file?) (= look-ahead-length 0)) (begin (peek-a-char) (if (and (not end-of-file?) (p (as-char (look-ahead-prefix 1)))) (begin (read-a-char) (skip-while p))))))) ;; Assume that str is just in front of us. Skip through it. ;; If str is not in front of us, a fatal error occurs with if-not-message as error message. Show source file in small font In collect-skip: Link from skip-string to it's cross reference table entry 
(define (skip-string str if-not-message) (let ((str-1 (read-a-string (string-length str)))) (if (not (equal? str str-1)) (error if-not-message)))) ;; Skip characters until str is encountered. If inclusive, also skip str. ;; It is assumed as a precondition that the length of str is at least one. Show source file in small font In collect-skip: Link from skip-until-string to it's cross reference table entry 
(define (skip-until-string str . inclusive) (let* ((str-lgt (string-length str)) (first-ch (string-ref str 0)) (incl (if (null? inclusive) #f (car inclusive)))) (skip-until-string-1 str str-lgt first-ch incl))) Show source file in small font In collect-skip: Link from skip-until-string-1 to it's cross reference table entry 
(define (skip-until-string-1 str str-lgt first-ch incl) (skip-while (negate (char-predicate first-ch))) (ensure-look-ahead str-lgt) (if (equal? (look-ahead-prefix str-lgt) str) (if incl (read-a-string str-lgt)) (begin (read-a-char) ; eat the matched first char (skip-until-string-1 str str-lgt first-ch incl)))) ;; Collect characters until str is encountered. If inclusive, also collect str. ;; It is assumed as a precondition that the length of str is at least one. Show source file in small font In collect-skip: Link from collect-until-string to it's cross reference table entry 
(define (collect-until-string str . inclusive) (let* ((str-lgt (string-length str)) (first-ch (string-ref str 0)) (incl (if (null? inclusive) #f (car inclusive)))) (collect-until-string-1 str str-lgt first-ch incl))) Show source file in small font In collect-skip: Link from collect-until-string-1 to it's cross reference table entry 
(define (collect-until-string-1 str str-lgt first-ch incl) (let ((res (collect-until (char-predicate first-ch)))) (ensure-look-ahead str-lgt) (if (equal? (look-ahead-prefix str-lgt) str) (if incl (string-append res (read-a-string str-lgt)) res) (string-append res (as-string (read-a-char)) (collect-until-string-1 str str-lgt first-ch incl))))) ; ---------------------------------------------- ;;; Useful predicates for skipping and collecting. ;; Is ch a white space character? Show source file in small font In collect-skip: Link from is-white-space? to it's cross reference table entry 
(define (is-white-space? ch) (if (eof? ch) #f (let ((n (as-number ch))) (or (eq? n 32) (eq? n 9) (eq? n 10) (eq? n 12) (eq? n 13))))) ;; Is ch an end of line charcter? Show source file in small font In collect-skip: Link from end-of-line? to it's cross reference table entry 
(define (end-of-line? ch) (if (eof? ch) #f (let ((n (as-number ch))) (or (eq? n 10) (eq? n 13))))) ;; Is ch an end of file character? Show source file in small font In collect-skip: Link from eof? to it's cross reference table entry 
(define (eof? ch) (eof-object? ch)) ;; Return a predicate functions which matches the character ch. ;; A higher order function. Show source file in small font In collect-skip: Link from char-predicate to it's cross reference table entry 
(define (char-predicate ch) (lambda (c) (eq? c ch)))