(in-package "CL-USER") ;; $Id: //info.ravenbrook.com/project/mps/tool/xml-import/mmdoc-transforms.lisp#4 $ ;; ;; Transformations for the xml dump of mmdoc. ;; Each deftransform is a non-destructive transformation from one set ;; of content siblings to another. The final form returns a list - ;; possibly empty - of siblings (to replace the old one). All previous ;; forms must return non-nil, otherwise no transformation takes place. ;; The content siblings which might be transformed are those ;; explicitly named in the parameter list. The parameter list ;; specialises with defmethod syntax: the types are tag names combined ;; with and, or and not. (A string sibling is (not tag).) Within the ;; body of the deftransform the contents of each (tag) parameter are ;; bound to a variable of the form -contents. The variable tail is ;; bound to the list of any remaining siblings (so we can tell whether we're ;; at the end of the entire content of our parent). ;; Some of the rules are simple enough: if you see X throw it away; if ;; you see Y replace it with its contents; if you see Z rename it; if ;; you see P followed by Q then switch them. Others are simply horrid. (deftransform adjacent-text ((this (not tag)) (that (not tag))) `(,(concatenate 'simple-base-string this that))) (deftransform remove-noteinfo ((noteinfo noteinfo)) nil) (deftransform remove-run ((run run)) run-contents) (deftransform rename-par ((par par)) (let ((contents par-contents)) `(,(if contents (make-tag 'p (remove-if (lambda (x) (eq (car x) 'def)) (tag-attributes par)) contents) (make-tag 'br nil))))) (deftransform rename-break ((break break)) `(,(make-tag 'br nil break-contents))) (deftransform limit-breaks-before ((br br) (string (not tag))) (char= (schar string 0) #\Newline) `(,string)) (deftransform limit-breaks-after ((string (not tag)) (br br)) (char= (schar string (1- (length string))) #\Newline) `(,string)) (deftransform font-semantics ((font font) (more (not font))) `(,(make-tag 'font (tag-attributes font) (append font-contents (list more))))) (deftransform adjacent-fonts ((one font) (two font)) (equal (tag-attributes one) (tag-attributes two)) `(,(make-tag 'font (tag-attributes one) (append one-contents two-contents)))) (deftransform empty-font ((font font)) (and (null tail) (null font-contents)) nil) (deftransform parargraph-fonts ((parargraph p)) (and parargraph-contents (let ((previous-attr nil)) (every (lambda (x) (and (tag-p x) (eq (tag-name x) 'font) ;; disallow if adjacent-fonts could still act (not (let ((new-attr (tag-attributes x))) (equal new-attr (shiftf previous-attr new-attr)))))) parargraph-contents))) `(,(make-tag 'p (tag-attributes parargraph) ;; Spread contents of Monaco. Change Monospace to
, with 
               ;; embedded 
as newlines (or to if it's a single word). (loop for font in parargraph-contents append (let ((attributes (tag-attributes font))) (cond ((member attributes '(((size "9pt") (name "Monaco")) ((size "7pt") (color "red") (name "Mishawaka")) ) :test 'equal) (tag-contents font)) ((member attributes '(((size "9pt") (style "bold") (name "Monaco")) ((size "9pt") (color "red") (style "bold") (name "Monaco")) ) :test 'equal) `(,(make-tag 'strong () (tag-contents font)))) ((equal attributes '((size "9pt") (style "italic") (name "Monaco"))) `(,(make-tag 'cite () (tag-contents font)))) ((member attributes '(((size "9pt") (name "monospace")) ((name "monospace")) ((size "9pt") (name "Courier New")) ((name "Courier New"))) :test 'equal) (let ((pre-contents (tag-contents font))) (list (make-tag (if (and (= (length pre-contents) 1) (let ((thing (car pre-contents))) (and (stringp thing) (notany 'whitespace-char-p thing)))) 'code 'pre) nil `(,(with-output-to-string (str) (loop for c in pre-contents do (cond ((not (tag-p c)) (write-string c str)) ((eq (tag-name c) 'br) (format str "~&")) ;; regrettably several doclinks point to ;; nowhere (though not all) ((eq (tag-name c) 'doclink) nil) (t (error "Unexpected content in font: ~s." c)))))))))) (t (error "Unexpected content in parargraph: ~s." font)))))))) (deftransform boring-item ((item item)) (member-if (lambda (attribute) (and (eq (car attribute) 'name) (member (cadr attribute) '( "$Fonts" "$Links" "$Revisions" "$UpdatedBy" "CreationTime" "Creator" "DocUID" "InternalNotes" "ModifiedBy" "ModifiedTime" "Name" "NonRTFContent" "NonRTFInternalNotes" "NonRTFPurpose" "NonRTFTagging" "NonRTFText" "Procedures" "Revision" "RevisionList" "Rules" "StatusList" "Subject" "Tag" "TagTemplate" "Type" "UpdateLog" "UserList" "Categories" ;; may want to restore this ) :test 'string=))) (tag-attributes item)) nil) (deftransform extract-title ((item item)) (member '(name "Title") (tag-attributes item) :test 'equal) `(,(make-tag 'title nil (tag-contents (car item-contents))))) (deftransform remove-pardef ((pardef pardef)) nil) ;;

1. Introduction

(deftransform create-heading ((text (not tag))) (and (plusp (length text)) (every (lambda (c) (or (upper-case-p c) (whitespace-char-p c))) text)) `(,(make-tag 'h2 nil `(,(let ((anchor (format nil "section-~a" (substitute-if #\- 'whitespace-char-p (string-downcase text))))) (make-tag 'a `((id ,anchor) (name ,anchor)) `(,(string-capitalize text)))))))) (deftransform extract-heading ((parargraph p)) (find-if (lambda (x) (and (tag-p x) (eq (tag-name x) 'h2))) parargraph-contents) (let* ((heads nil) (pre-heads nil) (attributes (tag-attributes parargraph)) (tail (loop for tail on parargraph-contents do (let ((next (car tail))) (cond ((and (tag-p next) (eq (tag-name next) 'br)) nil) ; chuck early
((and (tag-p next) (eq (tag-name next) 'h2)) (push next heads)) (heads (return tail)) (t (push next pre-heads))))))) ; didn't find the head yet `(,@(when pre-heads `(,(make-tag 'p attributes (reverse pre-heads)))) ,@(reverse heads) ,(make-tag 'p attributes tail)))) (deftransform empty-paragraph ((parargraph p)) (null parargraph-contents) nil) (deftransform adjacent-code-parargraphs ((first p) (second p)) (every (lambda (para) (let ((contents (tag-contents para))) (and (= (length contents) 1) (let ((within (car contents))) (and (tag-p within) (member (tag-name within) '(pre code))))))) (list first second)) (equal (tag-attributes first) (tag-attributes second)) (let* ((first-within (car first-contents)) (second-within (car second-contents)) (first-inner-contents (tag-contents first-within)) (second-inner-contents (tag-contents second-within)) (new-content (if (and (eq (tag-name first-within) 'code) (eq (tag-name second-within) 'code)) (make-tag 'code nil (list (format nil "~a ~a" (car first-inner-contents) (car second-inner-contents)))) (make-tag 'pre nil ;; (append first-inner-contents second-inner-contents) (list (format nil "~a~{~&~a~}" (car first-inner-contents) (append (cdr first-inner-contents) second-inner-contents))))))) `(,(make-tag 'p (tag-attributes first) (list new-content))))) (deftransform br-before-parargraph ((br br) (parargraph p)) `(,parargraph)) (deftransform br-after-parargraph ((parargraph p) (br br)) `(,parargraph)) ;; order by name attribute (deftransform reorder-for-change-history ((a item) (b item) (c item) (d item)) (let ((attributes (mapcar (lambda (item) (cadr (assoc 'name (tag-attributes item)))) (list a b c d)))) (and (every (lambda (attribute) (find attribute attributes :test 'string=)) '("ChangeLog" "DateList" "ISOList" "ReasonList")) (or (string> (car attributes) (cadr attributes)) (string> (cadr attributes) (caddr attributes)) (string> (caddr attributes) (cadddr attributes))))) (sort (list a b c d) 'string< :key (lambda (item) (cadr (assoc 'name (tag-attributes item)))))) (defun convert-author (author) (let ((in-house (cdr (assoc author '(("gavinm" . "grm") ("ndl" . "ndl") ("nick" . "ndl") ("nickb" . "nb") ("richard" . "rb")) :test 'string=)))) (if in-house ;; GDR (make-tag 'a `((href ,(format nil "mailto:~a@ravenbrook.com" in-house))) `(,(string-upcase in-house))) author))) (defparameter *todays-change* (make-tag 'tr '((valign "top")) `(,(make-tag 'td nil `(,(multiple-value-bind (secs mins hours days months years) (get-decoded-time) (declare (ignore secs mins hours)) (format nil "~d-~2,'0d-~2,'0d" years months days)))) ,(make-tag 'td nil `(,(convert-author "ndl"))) ,(make-tag 'td nil '("Converted from xml to html (second pass)."))))) ;; All the information is present in one of these alone (the ChangeLog) but it has to ;; be parsed and the formatting is not brilliant. Use ChangeLog for author's name only. ;; Each contains one textlist containing n texts containing the text I seek (deftransform change-history ((changelog item) (dates item) (iso-dates item) (reasons item)) (flet ((test-name (item name) (member `(name ,name) (tag-attributes item) :test 'equal))) (and (test-name reasons "ReasonList") (test-name dates "DateList") (test-name iso-dates "ISOList") (test-name changelog "ChangeLog"))) (flet ((safe-contents (contents) (let ((content (car contents))) (if (member (tag-name content) '(textlist datetimelist)) (tag-contents content) (list content))))) `(,(make-tag 'ravenbrook-history nil ;; For no expalined reason, when the ChangeLog entry spans more than one line ;; Notes just bungs in extra . The ReasonList entry seems OK though (note that ;; it may contain punctuation where the ChangeLog has a line break). Grand. ;; Strategy: I attempt to find the ISO-date in this ChangeLog entry. If it's not ;; there, I pop the list and cross fingers. If it's nowhere (and this did happen) ;; then we have a missing ChangeLog entry but the rest are ok. Alas. (reverse (cons *todays-change* (loop for reason in (safe-contents reasons-contents) as date in (safe-contents dates-contents) as iso-date in (safe-contents iso-dates-contents) with change-lines = (safe-contents changelog-contents) collect (let* ((reason (car (tag-contents reason))) (date (car (tag-contents date))) (iso-date-string (car (tag-contents iso-date))) (iso-date-length (length iso-date-string)) (change-line (when-let (start-change-lines change-lines) (loop (let ((candidate (car (tag-contents (pop change-lines))))) (when (and (<= iso-date-length (length candidate)) (string= iso-date-string candidate :end2 iso-date-length)) (return candidate)) (unless change-lines (setf change-lines start-change-lines) (return nil)))))) (author-start (when change-line (position-if 'alpha-char-p change-line :start (length iso-date-string)))) (author-end (when change-line (position-if-not 'alpha-char-p change-line :start author-start))) (author (when change-line (convert-author (subseq change-line author-start author-end)))) ;; 20000601T143423,09+01 (pretty-date (format nil "~a-~a-~a ~a:~a:~a ~a" (subseq date 0 4) (subseq date 4 6) (subseq date 6 8) (subseq date 9 11) (subseq date 11 13) (subseq date 13 15) (subseq date 18)))) (make-tag 'tr '((valign "top")) `(,(make-tag 'td nil `(,pretty-date)) ,(make-tag 'td nil (when author `(,author))) ,(make-tag 'td nil `(,(or reason ""))))))))))))) (deftransform address ((created-by item) (created-time item)) (flet ((test-name (item name) (member `(name ,name) (tag-attributes item) :test 'equal))) (and (test-name created-by "CreatedBy") (test-name created-time "CreatedTime"))) (let* ((author (convert-author (car (tag-contents (car created-by-contents))))) (full-date (car (tag-contents (car created-time-contents)))) (short-date (format nil "~a-~a-~a" (subseq full-date 0 4) (subseq full-date 4 6) (subseq full-date 6 8)))) `(,(make-tag 'address () `(,(format nil "~a, ~a" author short-date)))))) ;; draft (deftransform status ((status item)) (equal (tag-attributes status) '((name "Status"))) `(,(make-tag 'status nil (tag-contents (car status-contents))))) (defparameter *copyright-notice* "Copyright © 2001 Ravenbrook Limited. This document is provided \"as is\", without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this document. You may not duplicate or reproduce this document in any form without the express permission of the copyright holder.") (deftransform html ((title title) (text item) (address address) (status status) (history ravenbrook-history)) (member '(name "Text") (tag-attributes text) :test 'equal) (let* ((richtext (car text-contents)) (body-contents (tag-contents richtext))) `(,(make-tag 'html '((xmlns "http://www.w3.org/1999/xhtml") (|xml:lang| "en") (lang "en")) `(,(make-tag 'head () `(,title)) ,(make-tag 'body '((bgcolor "#FFFFFF") (text "#000000") (link "#000099") (vlink "#660066") (alink "#FF0000")) `(,(make-tag 'div '((align "center")) `(,(make-tag 'h1 nil title-contents) ,(make-tag 'p nil status-contents) ,address)) ,@body-contents ,(make-tag 'h2 nil `(,(make-tag 'a '((id "section-A") (name "section-A")) '("A. References")))) ,(make-tag 'h2 nil `(,(make-tag 'a '((id "section-B") (name "section-B")) '("B. Document History")))) ,(make-tag 'table nil history-contents) ,(make-tag 'hr nil nil) ,(make-tag 'p nil `(,(make-tag 'small nil `(,*copyright-notice*)))) ,(make-tag 'div '((align "center")) `(,(make-tag 'p nil ;; escape this $Id sting from Perforce `(,(make-tag 'code nil '(#.(format nil "$~a: $" "Id")))))))))))))) (deftransform html-postponed ((title title) (text item) (address address) (history ravenbrook-history) (status status)) (list title text address status history)) (deftransform html-postponed-again ((title title) (text item) (status status) (address address) (history ravenbrook-history)) (list title text address status history)) (deftransform purpose-content-tagging ((purpose item) (content item) (tagging item)) (flet ((test-name (item name) (member `(name ,name) (tag-attributes item) :test 'equal))) (and (test-name purpose "Purpose") (test-name content "Content") (test-name tagging "Tagging"))) `(,(make-tag 'item '((name "Text")) `(,(make-tag 'richtext () (loop for item in (list purpose content tagging) as heading in '("Purpose" "Content" "Tagging") collect (make-tag 'h2 () `(,heading)) append (tag-contents (car (tag-contents item))))))))) ;; regrettably several doclinks point to nowhere (though not all) (deftransform remove-doclink ((doclink doclink)) nil)