; The LAML library and programs are written by Kurt Normark, Aalborg University, Denmark.
; Copyright (C) 1999  Kurt Normark, normark@s.auc.dk.
;
; This program is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; This program is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with this program; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

;;;; This is a simple, non-validating XML parser for LAML together with XML pretty printing support.
;;;; As of the current version, the parser is not complete. Nevertheless, it is useful tool for parsing most
;;;; everyday XML documents to a Lisp data structure. <p> Given a well-formed XML document
;;;; this parser returns a Lisp tree structure that represents the parse tree of the XML document.
;;;; The parser handles start tags, end tags, and empty tags (in this parser called start-end tags).
;;;; Entities and their declarations are not handled at all.<p>
;;;; The top level functions are xml-parse and xml-parse-file. The xml-parser can be loaded as a library as well.<p>
;;;; There exists <a href="../doc/xml-parser.html" target="_top">elucidative documentation</a> of this parser.
;;;; See also <a href="html-support.html">the HTML parsing and pretty printing support</a>, which is
;;;; built on top of the XML tools, and the illustrative 
;;;; <a href="../../../examples/xml-parser-and-pretty-printer/index.html">examples </a> 
;;;; of the XML parser and pretty printer.<p>
;;;; This tool assumes that <kbd> laml.scm </kbd> and the general library are loaded. 
;;;; The tool loads the collect-skip and the file-read libraries.<p>
;;;; The typographical rebreaking and re-indenting of running text is still missing.<p>
;;;; The LAML interactive tool procedures <kbd> xml-pp </kbd> and <kbd> xml-parse </kbd>
;;;; in <kbd> laml.scm </kbd> are convenient top-level pretty printing and parse procedures respectively.<p>
;;;; Please notice that this is not a production quality parser and pretty printer! It is currently used for
;;;; internal purposes.<p>
;;;; From LAML version 20, the XML pretty printing in lib/xml-in-laml/xml-in-laml.scm replaces the XML pretty printing
;;;; in this library.
;;;; .title Reference Manual of the XML parser and pretty printer for LAML

(lib-load "collect-skip.scm")
(lib-load "file-read.scm")


;;; The format of the parse tree.
;;; A <em>parse tree</em> T  produced by this tool is of the form
;;; <pre>    (tree N ST1 ST2 ... STn) </pre>
;;; where STi, i=1..n are parse trees (recursively) and N is a node (see below). <p>
;;; A leaf node N may be of the form
;;; <pre>    (tree N) </pre>
;;; or just N if N is a string (corresponding to textual contents) or an empty tag (a tag without contents).<p>
;;; An <em>inner node</em> of a parse tree corresponds to a tag (an element) with contents. Such a node is represented
;;; by the following 'tag structure':
;;; <pre>    (tag kind tag-name . attr-info) </pre>
;;; tag is a symbol (for tagging). kind is either start or start-end (both symbols).
;;; tag-name is a string. Attr-info is the attribute on property list format.<p>
;;; A <em>terminal node</em> may be a start-end node, a comment node or just a contents string. End tags are not represented 
;;; in the parse tree.<p>
;;; Here is an example of a start-end node (empty node) with two properties:
;;; <pre>    (tag start-end "title" role "xxx" size "5") </pre>
;;; Comments are represented as comment nodes of the form
;;; <pre>    (comment comment-string) </pre> <p>
;;; Declaration nodes of the form
;;; <pre>    (declaration kind value) </pre>
;;; are also possible. They are for instance used for document type (???) information in HTML. Finally nodes of the form
;;; <pre>    (xml-declaration attribute-property-list) </pre>
;;; are supported.

; ---------------------------------------------------------------------------------------------------
; Parse specific error and message funtions.
Show source file in small font In xml-parser: Link from parse-error to it's cross reference table entry 
(define (parse-error . x) (display-message (string-append "PARSE ERROR: " (apply string-append (map as-string x)))) (parser-status) (error "STOPPING THE PARSER")) Show source file in small font In xml-parser: Link from parse-message to it's cross reference table entry 
(define (parse-message . x) (if xml-parse-verbose (display-message (string-append (apply string-append (map as-string x)))))) ; --------------------------------------------------------------------------------------------------- ; Overall functions Show source file in small font In xml-parser: Link from skip-white-space to it's cross reference table entry 1.2. Handling white space, comments, and front matters.
(define (skip-white-space) (skip-while is-white-space?)) ; Skip white space and XML commentsShow source file in small font In xml-parser: Link from skip-white-space-and-comments to it's cross reference table entry 1.2. Handling white space, comments, and front matters.
(define (skip-white-space-and-comments) (skip-white-space) (ensure-look-ahead 4) (if (and (not end-of-file?) (match-look-ahead? "<!--")) (begin (parse-message "Skipping comment") (read-a-string 4) (skip-until-string "-->" #t) (skip-white-space-and-comments)))) ; ---------------------------------------------------------------------------------------------------;;; Native low-level level parser functions. ;;; The top level parser functions in this section deliver specialized low level parse trees (in some ad hoc list structure). ;; Top level parse function which takes an XML file name as input, and delivers a parse tree on out-file-path. ;; file-path is a file path (relative or absolute) with or without an extension. The default extension is xml. ;; The parse tree is written on the file out-file-path.Show source file in small font In xml-parser: Link from parse-xml-file to it's cross reference table entry 1.1. The start
(define (parse-xml-file in-file-path out-file-path) (let ((init-in-path (file-name-initial-path in-file-path)) (in-file-name-prop (file-name-proper in-file-path)) (in-ext (file-name-extension in-file-path)) ) (reset-xml-parser) (let* ((input-port (open-input-file (string-append init-in-path in-file-name-prop "." (if (empty-string? in-ext) "xml" in-ext))))) (set! ip input-port) (let ((parse-tree (parse-xml-ip)) (target-file-name out-file-path)) (set! resulting-parse-tree parse-tree) (if (file-exists? target-file-name) (delete-file target-file-name)) (let ((op (open-output-file target-file-name))) (write parse-tree op) (close-output-port op))) (display-message (string-append "DONE. The parse tree is in " out-file-path)) (display-message "Use (scheme-pp <file>) to pretty pring the parse tree.") (display-message "The result is also in the variable resulting-parse-tree for interactive use.") (close-input-port ip)))) ;; This function parses a file and returns the parse tree. ;; file-path is a file path (relative or absolute) without any extension. ;; .returns The parse tree in the original, low level parse tree format (a list structure)Show source file in small font In xml-parser: Link from parse-xml to it's cross reference table entry 1.1. The start
(define (parse-xml file-path) (let ((init-path (file-name-initial-path file-path)) (file-name-prop (file-name-proper file-path)) (ext (file-name-extension file-path)) ) (reset-xml-parser) (let* ((input-port (open-input-file (string-append init-path file-name-prop "." (if (empty-string? ext) "xml" ext))))) (set! ip input-port) (let ((parse-tree (parse-xml-ip))) (close-input-port ip) parse-tree)))) ;; This function parses a string with XML contents and returns the parse tree. ;; xml-string is a string with xml contents ;; .returns The parse tree in the original, low level parse tree format (a list structure)Show source file in small font In xml-parser: Link from parse-xml-string to it's cross reference table entry 
(define (parse-xml-string xml-string) (reset-xml-parser) (set! ip xml-string) (let ((parse-tree (parse-xml-ip))) parse-tree)) Show source file in small font In xml-parser: Link from reset-xml-parser to it's cross reference table entry 
(define (reset-xml-parser) (reset-look-ahead-buffer) (set! parse-stack '())) ; ---------------------------------------------------------------------------------------------------;;; AST-level parser functions. ;;; The top level parser functions in this section deliver XML-in-LAML abstract syntax trees. ;;; These trees are much more useful than the low-level parse trees delivered by the functions in the previous section. ;; Top level parse function which takes an XML file name as input, and delivers an XML-in-LAML AST on out-file-path. ;; The AST is written on the file out-file-path. ;; .parameter in-file-path a file path (relative or absolute) with or without an extension. The default extension is xml. ;; .parameter out-file-path path the output file. ;; .parameter xml-language the name of the XML language in LAML, to which the resulting AST belongs. A symbol.Show source file in small font In xml-parser: Link from parse-xml-file-to-ast to it's cross reference table entry 
(define (parse-xml-file-to-ast in-file-path out-file-path xml-language) (let ((init-in-path (file-name-initial-path in-file-path)) (in-file-name-prop (file-name-proper in-file-path)) (in-ext (file-name-extension in-file-path)) ) (reset-xml-parser) (let* ((input-port (open-input-file (string-append init-in-path in-file-name-prop "." (if (empty-string? in-ext) "xml" in-ext))))) (set! ip input-port) (let ((parse-tree (parse-tree-to-ast (parse-xml-ip) xml-language)) (target-file-name out-file-path)) (set! resulting-parse-tree parse-tree) (if (file-exists? target-file-name) (delete-file target-file-name)) (let ((op (open-output-file target-file-name))) (write parse-tree op) (close-output-port op))) (display-message (string-append "DONE. The XML-in-LAML AST is in " out-file-path)) (display-message "The result is also in the variable resulting-parse-tree for interactive use.") (close-input-port ip)))) ;; This function parses an XML file and returns the corresponding XML-in-LAML AST. ;; .parameter file-path a file path (relative or absolute) without any extension. ;; .parameter xml-language the name of the XML language in LAML, to which the resulting AST belongs. A symbol. ;; .returns An XML-in-LAML AST.Show source file in small font In xml-parser: Link from parse-xml-to-ast to it's cross reference table entry 
(define (parse-xml-to-ast file-path xml-language) (let ((init-path (file-name-initial-path file-path)) (file-name-prop (file-name-proper file-path)) (ext (file-name-extension file-path)) ) (reset-xml-parser) (let* ((input-port (open-input-file (string-append init-path file-name-prop "." (if (empty-string? ext) "xml" ext))))) (set! ip input-port) (let ((parse-tree (parse-tree-to-ast (parse-xml-ip) xml-language))) (close-input-port ip) parse-tree)))) ;; This function parses a string with XML contents and returns an XML-in-LAML AST. ;; .parameter xml-string a string with xml contents. ;; .parameter xml-language the name of the XML language in LAML, to which the resulting AST belongs. A symbol. ;; .returns An XML-in-LAML AST.Show source file in small font In xml-parser: Link from parse-xml-string-to-ast to it's cross reference table entry 
(define (parse-xml-string-to-ast xml-string xml-language) (reset-xml-parser) (set! ip xml-string) (let ((parse-tree (parse-tree-to-ast (parse-xml-ip) xml-language))) parse-tree)) ; --------------------------------------------------------------------------------------------------- ;::parse-stack:: ; Parse state: the parse stack Show source file in small font In xml-parser: Link from xml-parse-verbose to it's cross reference table entry 
(define xml-parse-verbose #f) Show source file in small font In xml-parser: Link from parse-stack to it's cross reference table entry 1.1. The start
(define parse-stack '()) Show source file in small font In xml-parser: Link from parse-stack-push to it's cross reference table entry 
(define (parse-stack-push x) (set! parse-stack (cons x parse-stack))) Show source file in small font In xml-parser: Link from parse-stack-pop to it's cross reference table entry 
(define (parse-stack-pop) (if (not (parse-stack-empty?)) (set! parse-stack (cdr parse-stack)) (parse-error (string-append "Trying to pop an empty parse stack")))) Show source file in small font In xml-parser: Link from parse-stack-top to it's cross reference table entry 
(define (parse-stack-top) (if (not (parse-stack-empty?)) (car parse-stack) (parse-error (string-append "Trying to access the top of an empty parse stack")))) Show source file in small font In xml-parser: Link from parse-stack-empty? to it's cross reference table entry 
(define (parse-stack-empty?) (null? parse-stack)) Show source file in small font In xml-parser: Link from parse-stack-but-top to it's cross reference table entry 
(define (parse-stack-but-top) (if (not (parse-stack-empty?)) (cdr parse-stack) (parse-error (string-append "Trying to access the top of an empty parse stack")))) Show source file in small font In xml-parser: Link from parse-stack-top-and-pop to it's cross reference table entry 
(define (parse-stack-top-and-pop) (if (not (parse-stack-empty?)) (let ((res (car parse-stack))) (set! parse-stack (cdr parse-stack)) res ) (parse-error (string-append "Trying to access the top of an empty parse stack")))) Show source file in small font In xml-parser: Link from parse-stack-bottom to it's cross reference table entry 
(define (parse-stack-bottom) (if (not (parse-stack-empty?)) (last parse-stack) (parse-error (string-append "Trying to access the bottom of an empty parse stack")))) ; --------------------------------------------------------------------------------------------------- ; Tag structure and comment structure functions Show source file in small font In xml-parser: Link from make-tag-structure to it's cross reference table entry 1.3. The parse tree
(define (make-tag-structure kind tag-name attribute-property-list) (cons 'tag (cons kind (cons tag-name attribute-property-list)))) Show source file in small font In xml-parser: Link from kind-of-tag-structure to it's cross reference table entry 1.3. The parse tree
(define (kind-of-tag-structure tag-structure) (as-symbol (cadr tag-structure))) Show source file in small font In xml-parser: Link from tag-of-tag-structure to it's cross reference table entry 1.3. The parse tree
(define (tag-of-tag-structure tag-structure) (as-string (caddr tag-structure))) Show source file in small font In xml-parser: Link from attributes-of-tag-structure to it's cross reference table entry 1.3. The parse tree
(define (attributes-of-tag-structure tag-structure) (cdddr tag-structure)) ; Return the attribute value of attribute-key (a symbol) in attribute-list. ; If attribute does not exist, return #f.Show source file in small font In xml-parser: Link from attribute-value to it's cross reference table entry 
(define (attribute-value attribute-key attribute-list) (let ((a-list (propertylist-to-alist attribute-list))) (defaulted-get attribute-key a-list #f))) Show source file in small font In xml-parser: Link from make-comment-structure to it's cross reference table entry 
(define (make-comment-structure comment-string) (list 'comment comment-string)) Show source file in small font In xml-parser: Link from make-processing-instruction-structure to it's cross reference table entry 
(define (make-processing-instruction-structure pi-two-element-list) (cons 'processing-instruction pi-two-element-list)) Show source file in small font In xml-parser: Link from make-declaration-structure to it's cross reference table entry 
(define (make-declaration-structure kind val) (list 'declaration kind val)) Show source file in small font In xml-parser: Link from make-xml-declaration-structure to it's cross reference table entry 
(define (make-xml-declaration-structure attribute-plist) (list 'xml-declaration attribute-plist)) Show source file in small font In xml-parser: Link from comment-of-comment-structure to it's cross reference table entry 
(define (comment-of-comment-structure node) (cadr node)) Show source file in small font In xml-parser: Link from kind-of-declaration-structure to it's cross reference table entry 
(define (kind-of-declaration-structure node) (cadr node)) Show source file in small font In xml-parser: Link from value-of-declaration-structure to it's cross reference table entry 
(define (value-of-declaration-structure node) (caddr node)) Show source file in small font In xml-parser: Link from attributes-of-xml-declaration-structure to it's cross reference table entry 
(define (attributes-of-xml-declaration-structure node) (cadr node)) ; ------------------------------------------------------- ; Tag structure and comment structure predicates. Show source file in small font In xml-parser: Link from html-tree-node? to it's cross reference table entry 
(define (html-tree-node? x) (and (list? x) (>= (length x) 1) (eq? (car x) 'html-tree))) Show source file in small font In xml-parser: Link from xml-tree-node? to it's cross reference table entry 
(define (xml-tree-node? x) (and (list? x) (>= (length x) 1) (eq? (car x) 'xml-tree))) Show source file in small font In xml-parser: Link from start-node? to it's cross reference table entry 1.4. Parse tree functions
(define (start-node? x) (and (list? x) (>= (length x) 2) (eq? (car x) 'tag) (eq? (cadr x) 'start))) Show source file in small font In xml-parser: Link from start-end-node? to it's cross reference table entry 1.4. Parse tree functions
(define (start-end-node? x) (and (list? x) (>= (length x) 2) (eq? (car x) 'tag) (eq? (cadr x) 'start-end))) Show source file in small font In xml-parser: Link from comment-node? to it's cross reference table entry 
(define (comment-node? x) (and (list? x) (= (length x) 2) (eq? (car x) 'comment))) Show source file in small font In xml-parser: Link from processing-instruction-node? to it's cross reference table entry 
(define (processing-instruction-node? x) (and (list? x) (>= (length x) 2) (eq? (car x) 'processing-instruction))) Show source file in small font In xml-parser: Link from declaration-node? to it's cross reference table entry 
(define (declaration-node? x) (and (list? x) (= (length x) 3) (eq? (car x) 'declaration))) Show source file in small font In xml-parser: Link from xml-declaration-node? to it's cross reference table entry 
(define (xml-declaration-node? x) (and (list? x) (= (length x) 2) (eq? (car x) 'xml-declaration))) ; --------------------------------------------------------------------------------------------------- ; Parse tree functions ;; A global varible holding the latest produced parse treeShow source file in small font In xml-parser: Link from resulting-parse-tree to it's cross reference table entry 
(define resulting-parse-tree #f) Show source file in small font In xml-parser: Link from make-parse-tree to it's cross reference table entry 1.4. Parse tree functions
(define (make-parse-tree node subtree-list) (cons 'tree (cons node subtree-list))) Show source file in small font In xml-parser: Link from root-of-parse-tree to it's cross reference table entry 1.4. Parse tree functions
(define (root-of-parse-tree tree) (cadr tree)) Show source file in small font In xml-parser: Link from subtrees-of-parse-tree to it's cross reference table entry 1.4. Parse tree functions
(define (subtrees-of-parse-tree tree) (cddr tree)) Show source file in small font In xml-parser: Link from subtrees-of-xml-html-parse-tree to it's cross reference table entry 
(define subtrees-of-xml-html-parse-tree cdr) Show source file in small font In xml-parser: Link from parse-tree? to it's cross reference table entry 
(define (parse-tree? x) (and (pair? x) (eq? (car x) 'tree))) Show source file in small font In xml-parser: Link from textual-content-node? to it's cross reference table entry 
(define textual-content-node? string?) Show source file in small font In xml-parser: Link from terminal-node? to it's cross reference table entry 1.4. Parse tree functions
(define (terminal-node? tree) (or (string? tree) (and (list? tree) (= 2 (length tree)) (string? (cadr tree))) (and (list? tree) (= 2 (length tree)) (start-end-node? (cadr tree))) (start-end-node? tree) (comment-node? tree) (declaration-node? tree) (xml-declaration-node? tree))) Show source file in small font In xml-parser: Link from inner-node? to it's cross reference table entry 1.4. Parse tree functions
(define (inner-node? tree) (not (terminal-node? tree))) ; Return the node of a tree, which may be a contents string, a tag structure or a comment structure.Show source file in small font In xml-parser: Link from node-of-tree to it's cross reference table entry 
(define (node-of-tree tree) (cond ((terminal-node? tree) (cond ((string? tree) tree) ((start-end-node? tree) tree) ((comment-node? tree) tree) (else (root-of-parse-tree tree)))) ((inner-node? tree) (root-of-parse-tree tree)))) ; Return the node contents or tag name of the node (a symbol). An ad hoc function. ; The node may have been extracted ny node-of-tree.Show source file in small font In xml-parser: Link from node-info to it's cross reference table entry 
(define (node-info node) (cond ((text-contents-entry? node) (if (string? node) node (car node))) ((tag-entry? node) (as-symbol (tag-of-tag-structure node))) ((comment-entry? node) (comment-of-comment-structure node)) (else (error "node-info: Should not happen")))) ; Return the attributes of a node. If there is no attributes of the node, return the empty list.Show source file in small font In xml-parser: Link from node-attribute-info to it's cross reference table entry 
(define (node-attribute-info node) (cond ((text-contents-entry? node) '()) ((tag-entry? node) (attributes-of-tag-structure node)) (else (error "node-attribute-inf: Should not happen")))) ; --------------------------------------------------------------------------------------------------- ; Predicates on trees and tag structures: ; Most useful to make sense of stack entries. Can also be used to distinguish various kinds of subtrees ; of a tree from each other. Show source file in small font In xml-parser: Link from tag-entry? to it's cross reference table entry 
(define (tag-entry? x) (and (list? x) (>= (length x) 2) (eq? (car x) 'tag))) Show source file in small font In xml-parser: Link from comment-entry? to it's cross reference table entry 
(define (comment-entry? x) (and (list? x) (= (length x) 2) (eq? (car x) 'comment))) Show source file in small font In xml-parser: Link from declaration-entry? to it's cross reference table entry 
(define (declaration-entry? x) (and (list? x) (= (length x) 3) (eq? (car x) 'declaration))) Show source file in small font In xml-parser: Link from xml-declaration-entry? to it's cross reference table entry 
(define (xml-declaration-entry? x) (and (list? x) (= (length x) 2) (eq? (car x) 'xml-declaration))) Show source file in small font In xml-parser: Link from start-tag-entry? to it's cross reference table entry 2.4. Building the tree
(define (start-tag-entry? x) (and (tag-entry? x) (eq? (cadr x) 'start))) Show source file in small font In xml-parser: Link from start-end-tag-entry? to it's cross reference table entry 
(define (start-end-tag-entry? x) (and (tag-entry? x) (eq? (cadr x) 'start-end))) Show source file in small font In xml-parser: Link from tree-entry? to it's cross reference table entry 
(define (tree-entry? x) (and (list? x) (>= (length x) 2) (eq? (car x) 'tree))) Show source file in small font In xml-parser: Link from text-contents-entry? to it's cross reference table entry 2.4. Building the tree
(define (text-contents-entry? x) (or (string? x) (and (list? x) (= 1 (length x)) (string? (car x))))) Show source file in small font In xml-parser: Link from html-root? to it's cross reference table entry 
(define (html-root? x) (and (list? x) (>= (length x) 1) (eq? (car x) 'html-tree))) Show source file in small font In xml-parser: Link from xml-root? to it's cross reference table entry 
(define (xml-root? x) (and (list? x) (>= (length x) 1) (eq? (car x) 'xml-tree))) ; --------------------------------------------------------------------------------------------------- ; Substantial parse functions. Show source file in small font In xml-parser: Link from parse-xml-ip to it's cross reference table entry 1.1. The start
(define (parse-xml-ip) (if (not end-of-file?) (skip-white-space)) (cond ((and end-of-file? (not (parse-stack-empty?)) (>= (length parse-stack) 1)) (aggregate-final-parse-tree 'xml-tree)) ((and end-of-file? (parse-stack-empty?)) (make-final-parse-tree 'xml-tree '())) ((not end-of-file?) (let ((next-parse-tree (parse-xml-balanced-expression))) (parse-stack-push next-parse-tree) (parse-xml-ip))) (else (error (string-append "parse-xml-ip: strange parse error."))))) ; Skip all XML document prefix stuff, including comments. ; As of this version, it cannot skip an inline DTD (causes parse error).Show source file in small font In xml-parser: Link from skip-front-matters to it's cross reference table entry 1.2. Handling white space, comments, and front matters.
(define (skip-front-matters) (parse-message "Skipping front matter") (skip-white-space) (ensure-look-ahead 2) (cond ((match-look-ahead? "<?") (read-a-string 2) (skip-until-string "?>" #t) (skip-front-matters)) ((match-look-ahead? "<!") ; A linked program source marker to section 1.2:
'Handling white space, comments, and front matters.'
Mark char: a (let ((comment (collect-balanced-until (lambda (ch) (eqv? ch #\<)) (lambda (ch) (eqv? ch #\>))))) 'do-nothing) (skip-front-matters)))) Show source file in small font In xml-parser: Link from parse-xml-balanced-expression to it's cross reference table entry 1.1. The start 2.1. Overall parsing functions 2.2. Parsing a balanced XML expression. 2.3. Parsing the contents after a start tag 2.4. Building the tree 3.1. Important differences
(define (parse-xml-balanced-expression) (parse-message "Parsing balanced expression.") (if (not end-of-file?) (skip-white-space)) (let ((what (what-is-ahead))) ; A linked program source marker to section 2.2:
'Parsing a balanced XML expression.'
Mark char: a - looks ahead - does not read (cond ((eq? what 'tag) (let* ((tag (read-tag)) ; A linked program source marker to section 2.2:
'Parsing a balanced XML expression.'
Mark char: b (kind (kind-of-tag-structure tag))) (cond ((eq? kind 'start) ; A linked program source marker to section 2.2:
'Parsing a balanced XML expression.'
Mark char: c (parse-stack-push tag) (read-and-push-subtrees-until-end-tag (tag-of-tag-structure tag)) ; consumes the end tag too (build-tree-from-stack (tag-of-tag-structure tag)) ; return inner node ) ((eq? kind 'start-end) tag ; return terminal node ) ((eq? kind 'end) (parse-error "end tag encountered without matching start tag: " (as-string tag))) (else (parse-error "parse-xml-balanced-expression: unknown kind of tag")) ) ) ) ((eq? what 'contents-string) (let ((contents-string (read-contents-string))) contents-string)) ((eq? what 'comment) (let ((comment-string (read-comment))) (make-comment-structure comment-string))) ((eq? what 'processing-instruction) (let ((pi-structure (read-processing-instruction))) (make-processing-instruction-structure pi-structure))) ((eq? what 'declaration) (let ((declaration-structure (read-declaration))) (make-declaration-structure (kind-of-declaration-structure declaration-structure) (value-of-declaration-structure declaration-structure)))) ((eq? what 'xml-declaration) (let ((attributes (read-xml-declaration))) (make-xml-declaration-structure attributes))) (else (parse-error "parse-xml-balanced-expression: Parse problem"))))) Show source file in small font In xml-parser: Link from read-comment to it's cross reference table entry 
(define (read-comment) (parse-message "Reading comment") (skip-white-space) (ensure-look-ahead 4) (if (match-look-ahead? "<!--") (begin (read-a-string 4) (skip-white-space) (let ((res (collect-until-string "--"))) (read-a-string 2) (skip-white-space) (ensure-look-ahead 1) (if (match-look-ahead? ">") (read-a-char) (parse-error "End of comment char '>' expected")) res)) (parse-error "Beginning of comment '<!--' expected"))) Show source file in small font In xml-parser: Link from read-processing-instruction to it's cross reference table entry 
(define (read-processing-instruction) (parse-message "Reading processing instruction") (skip-white-space) (ensure-look-ahead 4) (if (match-look-ahead? "<?") (begin (read-a-string 2) (skip-white-space) (let ((pi-target (collect-until is-white-space?))) (skip-white-space) (let ((pi-string (collect-until-string "?>"))) (read-a-string 2) (skip-white-space) (list pi-target pi-string)))) (parse-error "Beginning of processing instruction <? expected"))) Show source file in small font In xml-parser: Link from read-declaration to it's cross reference table entry 
(define (read-declaration) ; such as doctype. A primitive version. (parse-message "Reading declaration") (skip-white-space) (ensure-look-ahead 2) (if (match-look-ahead? "<!") (begin (read-a-string 2) (skip-white-space) (let ((kind (collect-until is-white-space?))) (skip-white-space) (let ((val (collect-until-string ">"))) (read-a-char) ; > (make-declaration-structure kind val)))) (parse-error "Beginning of decaration '<!' expected"))) ; Return a the attribute list (property list) of an xml-declaration-structure. ; This procedures reads over the entire <? ... ?> structureShow source file in small font In xml-parser: Link from read-xml-declaration to it's cross reference table entry 
(define (read-xml-declaration) ; <? ... ?> (parse-message "Reading xml declaration") (skip-white-space) (ensure-look-ahead 5) (if (match-look-ahead? "<?xml") (begin (read-a-string 5) (skip-white-space) (let ((attributes (read-tag-attributes))) (skip-white-space) (ensure-look-ahead 2) (if (match-look-ahead? "?>") (read-a-string 2) (parse-error "?> expected")) attributes)) (parse-error "Beginning of declaration '<?xml' expected"))) Show source file in small font In xml-parser: Link from what-is-ahead to it's cross reference table entry 2.1. Overall parsing functions
(define (what-is-ahead) (ensure-look-ahead 5) (cond ((match-look-ahead? "<!--") 'comment) ((match-look-ahead? "<!") 'declaration) ; right concept? ((match-look-ahead? "<?xml") 'xml-declaration) ; right concept? ((match-look-ahead? "<?") 'processing-instruction) ((match-look-ahead? "<") 'tag) (else 'contents-string))) Show source file in small font In xml-parser: Link from read-contents-string to it's cross reference table entry 
(define (read-contents-string) (let ((cont-string (collect-until (lambda (ch) (eqv? ch #\<))))) (if (white-space-preserving-context? parse-stack) cont-string (remove-redundant-white-space cont-string)))) ; In a white space preserving context, such as HTML pre, strings are not rebroken. ; In XML we should probably chose always to preserve white space.Show source file in small font In xml-parser: Link from white-space-preserving-context? to it's cross reference table entry 
(define (white-space-preserving-context? parse-stack) #f) ; remove extra white space from str by returning a truncated string. Turn extra white space in spaces. A pure function.Show source file in small font In xml-parser: Link from remove-redundant-white-space to it's cross reference table entry 
(define (remove-redundant-white-space str) (remove-redundant-white-space-1 str "" 0 (string-length str) #f)) Show source file in small font In xml-parser: Link from remove-redundant-white-space-1 to it's cross reference table entry 
(define (remove-redundant-white-space-1 str res i lgt removing) (cond ((= i lgt) res) ((and removing (is-white-space? (string-ref str i))) (remove-redundant-white-space-1 str res (+ i 1) lgt #t)) ((and removing (not (is-white-space? (string-ref str i)))) (remove-redundant-white-space-1 str (string-append res (as-string (string-ref str i))) (+ i 1) lgt #f)) ((and (not removing) (is-white-space? (string-ref str i))) (remove-redundant-white-space-1 str (string-append res (as-string #\space)) (+ i 1) lgt #t)) ((and (not removing) (not (is-white-space? (string-ref str i)))) (remove-redundant-white-space-1 str (string-append res (as-string (string-ref str i))) (+ i 1) lgt #f)) (else (error "remove-redundant-white-space-1: should not happen")))) Show source file in small font In xml-parser: Link from read-and-push-subtrees-until-end-tag to it's cross reference table entry 2.2. Parsing a balanced XML expression. 2.3. Parsing the contents after a start tag 3.1. Important differences
(define (read-and-push-subtrees-until-end-tag end-tag-name) (skip-white-space) (let* ((n (+ (string-length end-tag-name) 3)) ; A linked program source marker to section 2.3:
'Parsing the contents after a start tag'
Mark char: a (end-tag-string (string-append (as-string #\<) (as-string #\/) end-tag-name (as-string #\>))) ) (ensure-look-ahead n) (if (match-look-ahead? end-tag-string) ; A linked program source marker to section 2.3:
'Parsing the contents after a start tag'
Mark char: b (begin (read-a-string n) ; finish (skip-white-space)) (let ((subtree (parse-xml-balanced-expression))) ; A linked program source marker to section 2.3:
'Parsing the contents after a start tag'
Mark char: c (parse-stack-push subtree) (skip-white-space) (read-and-push-subtrees-until-end-tag end-tag-name) ; tail recursive parsing of contents )))) Show source file in small font In xml-parser: Link from build-tree-from-stack to it's cross reference table entry 2.2. Parsing a balanced XML expression. 2.4. Building the tree
(define (build-tree-from-stack end-tag-name) (build-tree-from-stack-1 end-tag-name '())) Show source file in small font In xml-parser: Link from build-tree-from-stack-1 to it's cross reference table entry 2.4. Building the tree
(define (build-tree-from-stack-1 tag-name tree-list) (let ((top (parse-stack-top-and-pop))) (if (and (start-tag-entry? top) (matches-stack-entry top tag-name)) (make-parse-tree top tree-list) (build-tree-from-stack-1 tag-name (cons top tree-list))))) Show source file in small font In xml-parser: Link from matches-stack-entry to it's cross reference table entry 2.4. Building the tree
(define (matches-stack-entry top-tag-structure tag-name) (equal? (downcase-string (tag-of-tag-structure top-tag-structure)) (downcase-string tag-name))) ; We know that we are just in front of a tag. Return a tag structure.Show source file in small font In xml-parser: Link from read-tag to it's cross reference table entry 2.1. Overall parsing functions 2.2. Parsing a balanced XML expression.
(define (read-tag) (parse-message "Reading tag") (skip-white-space) (ensure-look-ahead 1) (if (match-look-ahead? "<") (read-a-char) (parse-error "'<' expected")) (ensure-look-ahead 1) (if (match-look-ahead? "/") ; end tag (begin ; A linked program source marker to section 2.1:
'Overall parsing functions'
Mark char: a (read-a-char) (let ((tag-name (collect-until (lambda (ch) (or (eqv? ch #\>) (is-white-space? ch)))))) (ensure-look-ahead 1) (if (match-look-ahead? ">") (read-a-char) ; > (parse-error "An end tag must not contain anything after the tag name")) (parse-message " " "End: " tag-name) (make-tag-structure 'end tag-name '()))) (let ((tag-name (collect-until (lambda (ch) (or (eqv? ch #\>) (eqv? ch #\/) (is-white-space? ch)))))) (skip-white-space) (ensure-look-ahead 2) ; A linked program source marker to section 2.1:
'Overall parsing functions'
Mark char: b (cond ((and (>= (string-length (max-look-ahead-prefix)) 2) (match-look-ahead? "/>")) ; A linked program source marker to section 2.1:
'Overall parsing functions'
Mark char: c - weird (read-a-string 2) (make-tag-structure 'start-end tag-name '())) ((and (>= (string-length (max-look-ahead-prefix)) 1) (match-look-ahead? ">")) ; A linked program source marker to section 2.1:
'Overall parsing functions'
Mark char: d (read-a-char) (make-tag-structure 'start tag-name '())) (else (let ((attribute-list (read-tag-attributes))) ; A linked program source marker to section 2.1:
'Overall parsing functions'
Mark char: e - white space processes at this point (ensure-look-ahead 2) (cond ((and (>= (string-length (max-look-ahead-prefix)) 2) (match-look-ahead? "/>")) (read-a-string 2) (parse-message " " "Start end: " tag-name ". Attributes: " (as-string attribute-list)) (make-tag-structure 'start-end tag-name attribute-list)) ((and (>= (string-length (max-look-ahead-prefix)) 1) (match-look-ahead? ">")) (read-a-char) (parse-message " " "Start: " tag-name ". Attributes: " (as-string attribute-list)) (make-tag-structure 'start tag-name attribute-list)) (else (parse-error "read-tag: end of tag expected")))) ))))) ; Return attribute value pairs as a property list ; Expect that we are at the first character the first attribute nameShow source file in small font In xml-parser: Link from read-tag-attributes to it's cross reference table entry 2.1. Overall parsing functions
(define (read-tag-attributes) (skip-white-space) (let ((attr-val (read-attribute-value-pair))) ; cons-pair or #f. (skip-white-space) (if attr-val (cons (car attr-val) (cons (cdr attr-val) (read-tag-attributes) ; recursive reading ) ) '()))) ; (define (read-attribute-value-pair) ; (ensure-look-ahead 2) ; (if (or (match-look-ahead? "/>") (match-look-ahead? "?>") (match-look-ahead? ">")) ; #f ; (let* ((attr-name (collect-until (lambda (ch) (or (eqv? ch #\=) (is-white-space? ch)))))) ; (skip-white-space) ; (ensure-look-ahead 1) ; (if (match-look-ahead? "=") ; (begin ; (read-a-char) ; (skip-white-space)) ; (parse-error "= expected after attribute value")) ; (ensure-look-ahead 1) ; (if (match-look-ahead? """) ; (begin ; (read-a-char) ; reading first " ; (let ((value (collect-until (char-predicate #")))) ; (read-a-char) ; reading second " ; (skip-white-space) ; (cons (as-symbol attr-name) value))) ; (parse-error "Attribute value in string quotes expected"))))) ; read a single attribute value pair from the input port and return it is a cons pair. ; If no more attributes exists (we see end of tag characters) return #f.Show source file in small font In xml-parser: Link from read-attribute-value-pair to it's cross reference table entry 2.1. Overall parsing functions
(define (read-attribute-value-pair) (ensure-look-ahead 2) (if (or (match-look-ahead? "/>") (match-look-ahead? "?>") (match-look-ahead? ">")) #f (let* ((attr-name (collect-until (lambda (ch) (or (eqv? ch #\=) (eqv? ch #\>) (is-white-space? ch)))))) (skip-white-space) (ensure-look-ahead 1) (cond ((match-look-ahead? "=") (read-a-char) (skip-white-space) (ensure-look-ahead 1) (cond ((match-look-ahead? "\"") (read-a-char) ; reading first " (let ((value (collect-until (char-predicate #\")))) (read-a-char) ; reading second " (skip-white-space) (cons (as-symbol attr-name) value))) ((match-look-ahead? "'") (read-a-char) ; reading first ' (let ((value (collect-until (char-predicate #\')))) (read-a-char) ; reading second ' (skip-white-space) (cons (as-symbol attr-name) value))) (else (let ((value (collect-until (lambda (ch) (or (is-white-space? ch) (eqv? ch #\>)))))) (skip-white-space) (cons (as-symbol attr-name) value))))) (else (cons (as-symbol attr-name) (as-string attr-name)) ; boolean attribute ))))) ; aggregate remaining stack entries as subtrees of a kind node. kind is a symbol, such ; as html-tree or xml-treeShow source file in small font In xml-parser: Link from aggregate-final-parse-tree to it's cross reference table entry 
(define (aggregate-final-parse-tree kind) (make-final-parse-tree kind (reverse (shallow-list-copy parse-stack)))) Show source file in small font In xml-parser: Link from make-final-parse-tree to it's cross reference table entry 
(define (make-final-parse-tree kind subtrees) (cons kind subtrees)) ; Assume as a pre-condition that lst is a proper listShow source file in small font In xml-parser: Link from shallow-list-copy to it's cross reference table entry 
(define (shallow-list-copy lst) (cond ((null? lst) '()) (else (cons (car lst) (shallow-list-copy (cdr lst)))))) ; ---------------------------------------------------------------------------------------------------;;; Utility parser functions. ;;; The functions in this section are all miscelaneous and utility functions of the parser. ;; Traverse the parse tree, tree, and return a list of result-transformed nodes ;; that satisfy the node-interesting? predicate in the parse tree. ;; In other words, apply the node-interesting? predicate to all subtrees of the tree during the traversal, and return ;; the result-transformed list of subtrees. ;; Both the functions node-interesting? and result-transformer are applied on trees and subtrees. ;; .example (traverse-and-collect-from-parse-tree resulting-parse-tree (is-tag-of-kind? 'a) parse-tree-to-laml-expression) Show source file in small font In xml-parser: Link from traverse-and-collect-from-parse-tree to it's cross reference table entry 
(define (traverse-and-collect-from-parse-tree tree node-interesting? result-transformer) (cond ((or (html-tree-node? tree) (xml-tree-node? tree)) (let ((subtree-results (map (lambda (subtr) (traverse-and-collect-from-parse-tree subtr node-interesting? result-transformer)) (cdr tree)))) (flatten subtree-results))) ((and (terminal-node? tree) (node-interesting? tree)) (list (result-transformer tree))) ((and (terminal-node? tree) (not (node-interesting? tree))) '()) ((inner-node? tree) (let ((subtree-results (map (lambda (subtr) (traverse-and-collect-from-parse-tree subtr node-interesting? result-transformer)) (subtrees-of-parse-tree tree)))) (if (node-interesting? tree) (cons (result-transformer tree) (flatten subtree-results)) (flatten subtree-results)))))) ;; Traverse the parse tree, tree, and return the list all attribute values of the attribute attr-key found ;; in the tree. ;; .example (collect-attributes-in-tree tree 'href) Show source file in small font In xml-parser: Link from collect-attributes-in-tree to it's cross reference table entry 
(define (collect-attributes-in-tree tree attr-key) (filter (lambda (x) (if x #t #f)) (traverse-and-collect-from-parse-tree tree (lambda (tr) (tag-entry? (node-of-tree tr))) (lambda (tr) (attribute-value attr-key (attributes-of-tag-structure (node-of-tree tr))))))) ;; Return a predicate which tests whether a subtree or node is of tag-kind (a symbol or string). ;; This function is a useful second parameter to traverse-and-collect-from-parse-tree. ;; .internal-references "related function" "traverse-and-collect-from-parse-tree"Show source file in small font In xml-parser: Link from is-tag-of-kind? to it's cross reference table entry 
(define (is-tag-of-kind? tag-kind) (lambda (x) (or (and (tag-entry? x) (equal? (downcase-string (as-string tag-kind)) (downcase-string (as-string (tag-of-tag-structure x))))) (and (tree-entry? x) (tag-entry? (node-of-tree x)) (equal? (downcase-string (as-string tag-kind)) (downcase-string (as-string (tag-of-tag-structure (node-of-tree x))))))))) ;; Display parser status in case of error in the parse process.Show source file in small font In xml-parser: Link from parser-status to it's cross reference table entry 
(define (parser-status) (display (stack-status)) (newline) (display (input-status)) ) Show source file in small font In xml-parser: Link from stack-status to it's cross reference table entry 
(define (stack-status) (string-append "THE PARSE STACK: " (as-string #\newline) (stack-status-1 parse-stack))) Show source file in small font In xml-parser: Link from stack-status-1 to it's cross reference table entry 
(define (stack-status-1 lst) (if (null? lst) "" (let ((top (car lst))) (string-append (cond ((text-contents-entry? top) (as-string (node-info top))) ((tag-entry? top) (as-string (node-info top))) ((tree-entry? top) (as-string (node-info (node-of-tree top)))) (else "???")) (as-string #\newline) (stack-status-1 (cdr lst)))))) Show source file in small font In xml-parser: Link from input-status to it's cross reference table entry 
(define (input-status) (ensure-look-ahead 100) (string-append "PREFIX OF REMAINING INPUT: " (as-string #\newline) (as-string #\") (max-look-ahead-prefix) (as-string #\") (as-string #\newline))) ; ------------------------------------------------------------------------------------------------------------------------ ; XML pretty printing. ; In reality this is mostly generic XML and HTML pretty printing. ; ------------------------------------------------------------------;;; Top level XML pretty printing functions. ;; Pretty prints the XML parse tree (Lisp file) in in-file-path. ;; Outputs the pretty printed result in out-file-path, ;; which defaults to in-file-path if not explicitly passed. ;; .form (pretty-print-xml-parse-tree-file in-file-path [out-file-path]) ;; .misc For XML-in-LAML ASTs use pretty-render-to-output-port instead of this functionShow source file in small font In xml-parser: Link from pretty-print-xml-parse-tree-file to it's cross reference table entry 
(define (pretty-print-xml-parse-tree-file in-file-path . optional-parameters) (let ((out-file-path (optional-parameter 1 optional-parameters in-file-path))) (let ((parse-tree (file-read in-file-path))) (write-text-file (pretty-print-xml-parse-tree parse-tree) out-file-path)))) ;; Pretty prints a HTML parse tree, and return the result as a string. ;; .misc For XML-in-LAML ASTs use pretty-xml-render instead of this functionShow source file in small font In xml-parser: Link from pretty-print-xml-parse-tree to it's cross reference table entry 
(define (pretty-print-xml-parse-tree parse-tree) (set! res '()) (pretty-print-xml-html-parse-tree-1 parse-tree 0 #f) (linearize-pp-result (reverse res))) ; ------------------------------------------------------------------;;; Variables that control the pretty printing. ;;; These variables apply for both HTML and XML. ;; An integer which gives the level of indentationShow source file in small font In xml-parser: Link from indentation-delta to it's cross reference table entry 
(define indentation-delta 3) ;; A boolean which controls the application of single line pretty printing. ;; If true, the pretty printer will pretty print short list forms on a single lineShow source file in small font In xml-parser: Link from use-single-lining to it's cross reference table entry 
(define use-single-lining #t) ;; An integer that expresses the preferred maximum column widthShow source file in small font In xml-parser: Link from prefered-maximum-width to it's cross reference table entry 
(define prefered-maximum-width 90) ; ------------------------------------------------------------------- ; The internal result list Show source file in small font In xml-parser: Link from res to it's cross reference table entry 
(define res '()) Show source file in small font In xml-parser: Link from add-to-res to it's cross reference table entry 
(define (add-to-res x) (set! res (cons x res))) Show source file in small font In xml-parser: Link from add-list-to-res to it's cross reference table entry 
(define (add-list-to-res lst) (for-each (lambda (el) (add-to-res el)) lst)) Show source file in small font In xml-parser: Link from remove-from-res to it's cross reference table entry 
(define (remove-from-res i) (if (> i 0) (begin (set! res (cdr res)) (remove-from-res (- i 1))))) Show source file in small font In xml-parser: Link from linearize-pp-result to it's cross reference table entry 
(define (linearize-pp-result lst) (apply string-append (map as-string lst))) ; In response to Per Madsen's request - but not a real solution. ; space after a tag belongs to the word just after the tag. ; (define (add-white-space single-lining?) ; (if (not single-lining?) ; (if (eq? laml-platform 'windows) ; pc end of line convention ; (begin (add-to-res #\newline)) ; (add-to-res #\return) ; (add-to-res #\newline)))) ; Is the last element on res ended with white space? ; Start tag strings will always give #tShow source file in small font In xml-parser: Link from trailing-whitespace-on-res? to it's cross reference table entry 
(define (trailing-whitespace-on-res?) (if (null? res) #f (let ((lst-el (car res))) (cond ((string? lst-el) (let ((lgt (string-length lst-el))) (or (eqv? #\space (string-ref lst-el (- lgt 1))) (start-tag-string? lst-el) ) )) ((char? lst-el) (or (eqv? lst-el #\space) (eqv? lst-el #\newline))) (else #f))))) ; precond: x is a stringShow source file in small font In xml-parser: Link from start-tag-string? to it's cross reference table entry 
(define (start-tag-string? x) (and (> (string-length x) 1) (eqv? (string-ref x 0) #\<) (not (eqv? (string-ref x 1) #\/)))) Show source file in small font In xml-parser: Link from trailing-whitespace-pending to it's cross reference table entry 
(define trailing-whitespace-pending #f) Show source file in small font In xml-parser: Link from add-white-space to it's cross reference table entry 
(define (add-white-space single-lining?) (if #t ; (trailing-whitespace-on-res?) (if (not single-lining?) (add-to-res #\newline)) (set! trailing-whitespace-pending #t))) Show source file in small font In xml-parser: Link from add-indentation to it's cross reference table entry 
(define (add-indentation i) (if #t ; (trailing-whitespace-on-res?) (add-to-res (indentation i)))) ; ------------------------------------------------------------------- Show source file in small font In xml-parser: Link from indentation to it's cross reference table entry 
(define (indentation n) (make-string n #\space)) Show source file in small font In xml-parser: Link from single-liner-form? to it's cross reference table entry 
(define (single-liner-form? x start-col max-width) (if use-single-lining (let ((width (meassure-html-xml-form x))) (<= (+ width start-col) max-width)) #f)) ; ------------------------------------------------------------------------------------------------------------ ; The substantial and central pretty printing function. This function ; support both XML and HTML. Show source file in small font In xml-parser: Link from problem-form to it's cross reference table entry 
(define problem-form #f) Show source file in small font In xml-parser: Link from pretty-print-xml-html-parse-tree-1 to it's cross reference table entry 
(define (pretty-print-xml-html-parse-tree-1 form start-col single-lining?) (cond ((html-root? form) (add-subtrees-to-res (cdr form) start-col single-lining?)) ((xml-root? form) (add-subtrees-to-res (cdr form) start-col single-lining?)) ((tree-entry? form) (if (single-liner-form? form start-col prefered-maximum-width) (begin (pretty-print-start-tag (root-of-parse-tree form) start-col #t) (add-white-space #t) (add-subtrees-to-res (subtrees-of-parse-tree form) (+ start-col indentation-delta) #t) (pretty-print-end-tag (root-of-parse-tree form) start-col #t)) (begin (pretty-print-start-tag (root-of-parse-tree form) start-col single-lining?) (add-white-space single-lining?) (add-subtrees-to-res (subtrees-of-parse-tree form) (+ start-col indentation-delta) single-lining?) (if (not single-lining?) (add-indentation start-col)) (pretty-print-end-tag (root-of-parse-tree form) start-col single-lining?)))) ((text-contents-entry? form) (add-list-to-res (break-long-string (node-info form) start-col (max 10 (- prefered-maximum-width start-col))))) ; ((text-contents-entry? form) ; (add-to-res (node-info form))) ((comment-entry? form) (pretty-print-comment (comment-of-comment-structure form))) ((declaration-entry? form) (pretty-print-declaration (kind-of-declaration-structure form) (value-of-declaration-structure form))) ((xml-declaration-entry? form) (pretty-print-xml-declaration (attributes-of-xml-declaration-structure form))) ((start-end-tag-entry? form) (pretty-print-empty-tag form start-col single-lining?)) (else (begin (set! problem-form form) (error (string-append "pretty-print-xml-html-parse-tree-1: Unknown structure encountered: " (as-string form))))))) ; Break string into a list of substrings with interleaved indentation strings ; Handles the breaking and pretty printing of long textual pieces.Show source file in small font In xml-parser: Link from break-long-string to it's cross reference table entry 
(define (break-long-string str indentation max-length) (if (and (string? str) (not (empty-string? str))) (let* ((str1 (transliterate str #\newline (as-string #\space))) (indentation-string (string-append (as-string #\newline) (make-string indentation #\space))) (broken-strings (break-long-string-1 str1 (string-length str) 0 max-length '())) (lgt (length broken-strings)) ) (merge-lists-simple broken-strings (make-list (- lgt 1) indentation-string))) (list str))) Show source file in small font In xml-parser: Link from break-long-string-1 to it's cross reference table entry 
(define (break-long-string-1 str str-lgt i max-length res) (cond ((>= i str-lgt) (reverse (cons str res))) ((and (< i str-lgt) (member (string-ref str i) white-space-char-list)) (if (> i max-length) (let ((first (substring str 0 i)) (rest (substring str (+ i 1) str-lgt))) (break-long-string-1 rest (string-length rest) 1 max-length (cons first res))) (break-long-string-1 str str-lgt (+ i 1) max-length res))) (else (break-long-string-1 str str-lgt (+ i 1) max-length res)))) Show source file in small font In xml-parser: Link from add-subtrees-to-res to it's cross reference table entry 
(define (add-subtrees-to-res subtree-list start-col single-lining?) (for-each (lambda (tree) (if (not single-lining?) (add-indentation start-col)) (pretty-print-xml-html-parse-tree-1 tree start-col single-lining?) (add-white-space single-lining?)) subtree-list)) Show source file in small font In xml-parser: Link from pretty-print-start-tag to it's cross reference table entry 
(define (pretty-print-start-tag tag-structure start-col single-lining?) (let ((tag-name (tag-of-tag-structure tag-structure)) (attributes (attributes-of-tag-structure tag-structure))) (if (null? attributes) (add-to-res (string-append (as-string #\<) (as-string tag-name) (as-string #\>))) (begin (add-to-res (string-append (as-string #\<) (as-string tag-name))) (add-to-res #\space) (add-attributes-to-res attributes (+ start-col 2 (string-length tag-name)) single-lining?) (remove-from-res 1) ; last space before greater than char (add-to-res #\>))))) ; attributes is a property listShow source file in small font In xml-parser: Link from add-attributes-to-res to it's cross reference table entry 
(define (add-attributes-to-res attributes start-col single-lining?) (cond ((and (not (null? attributes)) (>= (length attributes) 2)) (let ((key (car attributes)) (val (cadr attributes)) (res-attributes (cddr attributes))) (add-single-attribute-to-res key val start-col single-lining?) (add-attributes-to-res (cddr attributes) start-col single-lining?))) ((and (not (null? attributes)) (< (length attributes) 2)) (error (string-append "add-attributes-to-res: malformed attribute list: " (as-string attributes)))))) Show source file in small font In xml-parser: Link from add-single-attribute-to-res to it's cross reference table entry 
(define (add-single-attribute-to-res key val start-col single-lining?) (add-to-res key) (add-to-res "=") (add-to-res (string-it val)) (add-to-res #\space)) Show source file in small font In xml-parser: Link from pretty-print-empty-tag to it's cross reference table entry 
(define (pretty-print-empty-tag tag-structure start-col single-lining?) (let ((tag-name (tag-of-tag-structure tag-structure)) (attributes (attributes-of-tag-structure tag-structure))) (if (null? attributes) (add-to-res (string-append (as-string #\<) (as-string tag-name) (as-string #\/) (as-string #\>))) (begin (add-to-res (string-append (as-string #\<) (as-string tag-name))) (add-to-res #\space) (add-attributes-to-res attributes (+ start-col 2 (string-length tag-name)) single-lining?) (remove-from-res 1) ; last space before greater than char (add-to-res (string-append (as-string #\/) (as-string #\>) )))))) Show source file in small font In xml-parser: Link from pretty-print-end-tag to it's cross reference table entry 
(define (pretty-print-end-tag tag-structure start-col single-lining?) (let* ((tag-name (tag-of-tag-structure tag-structure)) (end-tag (string-append (as-string #\<) (as-string #\/) (as-string tag-name) (as-string #\>)))) (add-to-res end-tag))) Show source file in small font In xml-parser: Link from pretty-print-comment to it's cross reference table entry 
(define (pretty-print-comment comment-string) (add-to-res "<!--") (add-to-res #\space) (add-to-res comment-string) (add-to-res "-->")) Show source file in small font In xml-parser: Link from pretty-print-declaration to it's cross reference table entry 
(define (pretty-print-declaration kind value) (add-to-res "<!") (add-to-res (as-string kind)) (add-to-res #\space) (add-to-res value) (add-to-res ">")) Show source file in small font In xml-parser: Link from pretty-print-xml-declaration to it's cross reference table entry 
(define (pretty-print-xml-declaration attributes) (add-to-res "<?xml") (add-to-res #\space) (let ((dummy 0)) (add-attributes-to-res attributes dummy #f)) (add-to-res #\space) (add-to-res "?>")) ; ------------------------------------------------------------------ ; Html and xml meassuring Show source file in small font In xml-parser: Link from meassure-html-xml-form to it's cross reference table entry 
(define (meassure-html-xml-form form) (cond ((html-root? form) (accumulate-right + 0 (map meassure-html-xml-form (cdr form)))) ((tree-entry? form) (+ (meassure-html-xml-form (root-of-parse-tree form)) ; both start only 1 (accumulate-right + 0 (map meassure-html-xml-form (subtrees-of-parse-tree form))) (string-length (as-string (tag-of-tag-structure (root-of-parse-tree form)))) 3)) ((text-contents-entry? form) (string-length (node-info form))) ((comment-entry? form) (+ 7 (string-length (comment-of-comment-structure form)))) ((declaration-entry? form) (+ 3 (string-length (as-string (kind-of-declaration-structure form))) (string-length (value-of-declaration-structure form)))) ((or (start-end-tag-entry? form) (start-tag-entry? form)) (meassure-tag-structure form)) (else (error (string-append "meassure-html-xml-form: Unknown structure encountered: " (as-string form)))))) Show source file in small font In xml-parser: Link from meassure-tag-structure to it's cross reference table entry 
(define (meassure-tag-structure tag-structure) (let ((tag-name (tag-of-tag-structure tag-structure)) (attributes (attributes-of-tag-structure tag-structure))) (+ (string-length (as-string tag-name)) 3 (meassure-attributes attributes)))) Show source file in small font In xml-parser: Link from meassure-attributes to it's cross reference table entry 
(define (meassure-attributes attributes) (accumulate-right + 0 (map meassure-attribute (propertylist-to-alist attributes)))) Show source file in small font In xml-parser: Link from meassure-attribute to it's cross reference table entry 
(define (meassure-attribute attribute) (let ((key (car attribute)) (val (cdr attribute))) (+ (string-length (as-string key)) 1 (string-length (as-string val))))) ; ------------------------------------------------------------------ ;;; Parse tree conversions. ;;; In this section we provide a number of conversion functions that work on parse trees. ;; Transform an XML or HTML parse tree to a similar surface LAML expression on output-file. ;; This function accept parse tree rooted by the symbols html-tree, xml-tree, as well the symbol tree. ;; .parameter tree an XML or HTML parse tree ;; .parameter output-file The name of the file on which to write the LAML expression. Can be full path. Must include extension. ;; .reference "laml.scm function" "html-to-laml" "../../../man/laml.html#html-to-laml" ;; .misc When the resulting file is LAML processed, the LAML file will write the a LAML file, say f.laml, to f.html in the same directory as the laml file.Show source file in small font In xml-parser: Link from parse-tree-to-laml to it's cross reference table entry 
(define (parse-tree-to-laml tree output-file) (let ((prefix (string-append "(load (string-append laml-dir \"laml.scm\")) " "(laml-style \"simple-html4.0-loose\")" "(write-text-file ")) (html-document (parse-tree-2-laml-string tree)) (suffix " (string-append (startup-directory) laml-source-file-name-without-extension \".html\"))") ) (write-text-file (string-append prefix html-document suffix) output-file))) ; Given a parse tree return a string with a LAML version of the tree.Show source file in small font In xml-parser: Link from parse-tree-2-laml-string to it's cross reference table entry 
(define (parse-tree-2-laml-string tree) (if (parse-tree? tree) (parse-tree-2-laml-string-1 tree) (let ((the-tree (find-in-list (lambda (x) (tree-entry? x)) (subtrees-of-xml-html-parse-tree tree)))) (if the-tree (parse-tree-2-laml-string-1 the-tree) (laml-error "parse-tree-2-laml-string: Cannot find tree structure..."))))) ; The function doing the real workShow source file in small font In xml-parser: Link from parse-tree-2-laml-string-1 to it's cross reference table entry 
(define (parse-tree-2-laml-string-1 tree) (cond ((inner-node? tree) (cond ((html-tree-node? tree) (list-to-string (map parse-tree-2-laml-string-1 (cdr tree)) " ")) (else (let ((root (root-of-parse-tree tree)) (subtrees (subtrees-of-parse-tree tree))) (string-append "(" (downcase-string (tag-of-tag-structure root)) " " (lamlify-attributes-string (attributes-of-tag-structure root)) (list-to-string (map parse-tree-2-laml-string-1 subtrees) " ") ")"))))) ((terminal-node? tree) (cond ((text-contents-entry? tree) (string-it (string-protect (node-info tree)))) ((start-end-node? tree) (string-append "(" (downcase-string (tag-of-tag-structure tree)) " " (lamlify-attributes-string (attributes-of-tag-structure tree)) ")")) (else "") ; drops comments and declarations from the output. ) ) (error "parse-tree-2-laml: Should not happen 2"))) Show source file in small font In xml-parser: Link from lamlify-attributes-string to it's cross reference table entry 
(define (lamlify-attributes-string attr-list) (cond ((null? attr-list) "") (else (let ((key (downcase-string (as-string (car attr-list)))) (val (cadr attr-list))) (string-append "'" key " " (string-it val) " " (lamlify-attributes-string (cddr attr-list))))))) ; Protect internal string quotes in str with backslashes.Show source file in small font In xml-parser: Link from string-protect to it's cross reference table entry 
(define (string-protect str) (replace-string str (as-string #\") (string-append (as-string #\\) (as-string #\")))) ; Transform an XML or HTML parse tree to a LAML surface expressions (in terms of a Scheme list structure). ; This function is similar to parse-tree-to-laml which delivers a textual result ; (a string) on an output file. ; Feb 2002: There are some problems with this function. Removed from public interface.Show source file in small font In xml-parser: Link from parse-tree-to-laml-expression to it's cross reference table entry 
(define (parse-tree-to-laml-expression tree) (cond ((inner-node? tree) (cond ((and (html-tree-node? tree) (= 1 (length (cdr tree)))) (parse-tree-to-laml-expression (root-of-parse-tree tree))) ((html-tree-node? tree) (map parse-tree-to-laml-expression (cdr tree))) (else (let ((root (root-of-parse-tree tree)) (subtrees (subtrees-of-parse-tree tree))) (cons (as-symbol (downcase-string (tag-of-tag-structure root))) (append (attributes-of-tag-structure root) (map parse-tree-to-laml-expression subtrees))))))) ((terminal-node? tree) (cond ((text-contents-entry? tree) (node-info tree)) ((start-end-node? tree) (cons (as-symbol (downcase-string (tag-of-tag-structure tree))) (attributes-of-tag-structure tree))) (else "") ; Problematic !!! )) (error "parse-tree-to-laml-expression: Should not happen"))) ;; Convert a HTML/XML parse tree to a LAML abstract syntax tree in language. ;; This function accept parse tree rooted by the symbols html-tree, xml-tree, as well the symbol tree. ;; Recall that the syntax trees are used as the internal format by ;; the validating mirrors of LAML. ;; .parameter pt The parse tree ;; .parameter language The name of the XML language, such as xhtml10-transitional. A symbol. ;; .internal-references "related function" "parse-tree-to-element-structure"Show source file in small font In xml-parser: Link from parse-tree-to-ast to it's cross reference table entry 
(define (parse-tree-to-ast pt language) (if (parse-tree? pt) (parse-tree-to-ast-1 pt language) (let ((the-tree (find-in-list (lambda (x) (tree-entry? x)) (subtrees-of-xml-html-parse-tree pt)))) (if the-tree (parse-tree-to-ast-1 the-tree language) (laml-error "parse-tree-to-ast: Cannot locate parse tree proper in the parse tree: " pt))))) ; The function doing the real work.Show source file in small font In xml-parser: Link from parse-tree-to-ast-1 to it's cross reference table entry 
(define (parse-tree-to-ast-1 pt language) (letrec ((make-ast ; restricted version. Precondition: contents is a list. (lambda (element-name contents attributes kind language) (list 'ast (as-string element-name) contents attributes (as-symbol kind) (as-symbol language) '()))) (make-xml-comment (lambda (str) (list 'xml-comment str))) (make-xml-processing-instruction (lambda (pi-target pi-string) (list 'processing-instruction pi-target pi-string))) ) (cond ((parse-tree? pt) (let* ((node (root-of-parse-tree pt)) (subtrees (subtrees-of-parse-tree pt))) (if (start-node? node) (make-ast (tag-of-tag-structure node) (map (lambda (st) (parse-tree-to-ast-1 st language)) subtrees) (attributes-of-tag-structure node) 'double language ) (laml-error "parse-tree-to-ast-1: Parse tree is expected to have a start-node as root: " pt)))) ((textual-content-node? pt) pt) ((start-end-node? pt) (make-ast (tag-of-tag-structure pt) '() (attributes-of-tag-structure pt) 'single language)) ((start-node? pt) (laml-error "parse-tree-to-ast-1: Start-end node encountered as direct child of tree node. Should not happen: " pt)) ((comment-node? pt) (make-xml-comment (second pt))) ((processing-instruction-node? pt) (make-xml-processing-instruction (second pt) (third pt))) ((or (declaration-node? pt) (xml-declaration-node? pt) "")) (else (laml-error "parse-tree-to-ast-1: Unknown kind of parse tree: " pt))))) ;; Convert a HTML/XML parse tree to an element structure ala LENO. ;; This function accept parse tree rooted by the symbols html-tree, xml-tree, as well the symbol tree. ;; Modelled after parse-tree-to-ast. ;; .internal-references "related function" "parse-tree-to-ast"Show source file in small font In xml-parser: Link from parse-tree-to-element-structure to it's cross reference table entry 
(define (parse-tree-to-element-structure pt) (if (parse-tree? pt) (parse-tree-to-element-structure-1 pt) (let ((the-tree (find-in-list (lambda (x) (tree-entry? x)) (subtrees-of-xml-html-parse-tree pt)))) (if the-tree (parse-tree-to-element-structure-1 the-tree) (laml-error "parse-tree-to-element-structure: Cannot locate parse tree proper in the parse tree: " pt))))) ; The function doing the real work.Show source file in small font In xml-parser: Link from parse-tree-to-element-structure-1 to it's cross reference table entry 
(define (parse-tree-to-element-structure-1 pt) (letrec ((make-element-structure (lambda (name contents attributes) (list 'element name contents attributes)))) ; COPIED FROM LENO (cond ((parse-tree? pt) (let* ((node (root-of-parse-tree pt)) (subtrees (subtrees-of-parse-tree pt))) (if (start-node? node) (make-element-structure (as-symbol (tag-of-tag-structure node)) (map parse-tree-to-element-structure-1 subtrees) (propertylist-to-alist (attributes-of-tag-structure node))) (laml-error "parse-tree-to-element-structure-1: Parse tree is expected to have a start-node as root: " pt)))) ((textual-content-node? pt) pt) ((start-end-node? pt) (make-element-structure (as-symbol (tag-of-tag-structure pt)) '() (propertylist-to-alist (attributes-of-tag-structure pt)))) ((start-node? pt) (laml-error "parse-tree-to-element-structure-1: Start-end node encountered as direct child of tree node. Should not happen: " pt)) ((or (declaration-node? pt) (xml-declaration-node? pt) (comment-node? pt) "")) (else (laml-error "parse-tree-to-element-structure-1: Unknown kind of parse tree: " pt)))))