highlight!
#!/usr/local/bin/gosh
;; -*- encoding: utf-8 -*-
;;;; r6rs-shtml-markup.scm - A naive permissive R6RS Scheme markupper
;;;
;;; Copyright (c) 2008 OOHASHI, Daichi <leque@katch.ne.jp>,
;;; All rights reserved.
;;;
;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:
;;;
;;; 1. Redistributions of source code must retain the above copyright
;;;    notice, this list of conditions and the following disclaimer.
;;;
;;; 2. Redistributions in binary form must reproduce the above copyright
;;;    notice, this list of conditions and the following disclaimer in the
;;;    documentation and/or other materials provided with the distribution.
;;;
;;; 3. Neither the name of the authors nor the names of its contributors
;;;    may be used to endorse or promote products derived from this
;;;    software without specific prior written permission.
;;;
;;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;;; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;;; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;;; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;;; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;;; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
;;; TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;

;; should be syntax-case, internal definition
(define-macro (define-tags . ts)
  `(begin
     ,@(map (lambda (t)
	      (let ((s (symbol->string t)))
		`(define ,(string->symbol (string-append "tag:" s))
		   (%tag ,s))))
	    ts)))

;; Read R6RS Scheme source code from port, and SHTML-markup it.
;; Markups are shown below:
;;   pre.scheme		: top level
;;   .scirpt-header	: shebang line
;;   .abbrev		: ', `, etc.
;;   .atomosphere	: inter lexme whitespaces
;;   .boolean		: #t and #f
;;   .bytevector	: bytevectors
;;   .character		: characters
;;   .comment		: line, block, datum, and #!<identifier> comment
;;   .dot		: `.' character
;;   .identifier	: identifiers
;;   .list		: lists
;;   .number		: numbers
;;   .string		: strings
;;   .vector		: vectors
;;   .unknown		: the others
(define (r6rs->shtml port)
  (define (complement f)
    (lambda args (not (apply f args))))
  (define (reverse-list->string cs)
    (list->string (reverse cs)))
  (define (whitespace? c)
    (memv c '(#\x0009 #\x000a #\x000b #\x000c #\x000d
	      #\x0020 #\x0085 #\x0085 #\x00a0 #\x1680 #\x180e
	      #\x2000 #\x2001 #\x2002 #\x2003 #\x2004 #\x2005
	      #\x2006 #\x2007 #\x2008 #\x2009 #\x200a
	      #\x2028 #\x2029 #\x202f #\x205f #\x3000)))
  (define (delimiter? c)
    (or (eof-object? c)
	(whitespace? c)
	(memv c '(#\( #\) #\[ #\] #\" #\; #\#))))
  (define (comment-end? c)
    (or (eqv? c #\x2029)		; paragraph separator
	(eqv? c #\newline)))
  (define (peek) (peek-char port))
  (define (getc) (read-char port))
  (define (tag cls . cont)
    `(span (@ (class ,cls)) ,@cont))
  (define (tag-class t) (cadr (cadadr t)))
  (define (%tag cls)
    (lambda cont (apply tag cls cont)))
  #|
  (define-syntax define-tags
    (lambda (x)
      (syntax-case x ()
	((k t ...)
	 (with-syntax (((s ...) (map (lambda (i)
				       (symbol->string (syntax->datum i)))
				     #'(t ...))))
	   (with-syntax (((n ...)
			  (map (lambda (s)
				 (datum->syntax #'k
				   (string->symbol
				    (string-append "tag:" (syntax->datum s)))))
			       #'(s ...))))
	     #`(begin
		 (define n (%tag s))
		 ...)))))))
  |#
  (define-tags
    script-header
    abbrev atomosphere boolean bytevector character comment
    dot identifier list number string vector unknown)
  (define (read-until pred? ctx)
    (let loop ((rs ctx))
      (let ((c (peek)))
	(if (or (eof-object? c) (pred? c))
	    (reverse-list->string rs)
	    (loop (cons (getc) rs))))))
  (define (read-block-comment level ctx)
    (if (zero? level)
	(reverse-list->string ctx)
	(let ((c (getc)))
	  (cond
	   ((eof-object? c)
	    (read-block-comment 0 ctx))
	   ((and (eqv? c #\|) (eqv? (peek) #\#))
	    (read-block-comment (- level 1) (cons (getc) (cons c ctx))))
	   ((and (eqv? c #\#) (eqv? (peek) #\|))
	    (read-block-comment (+ level 1) (cons (getc) (cons c ctx))))
	   (else
	    (read-block-comment level (cons c ctx)))))))
  (define (read-string ctx)
    (let loop ((rs ctx))
      (let ((c (peek)))
	(cond
	 ((eof-object? c)
	  (reverse-list->string rs))
	 ((eqv? c #\")
	  (reverse-list->string (cons (getc) rs)))
	 ((eqv? c #\\)
	  (let ((c (getc)))
	    (if (eof-object? (peek))
		(loop (cons c rs))
		(loop (cons (getc) (cons c rs))))))
	 (else
	  (loop (cons (getc) rs)))))))
  (define (close-paren? op ch)
    (eqv? ch
	  (cond ((assv op '((#\( . #\))
			    (#\[ . #\])))
		 => cdr)
		(else #f))))
  (define (read-list op ctx)
    (let loop ((rs ctx))
      (let ((c (peek)))
	(cond ((eof-object? c)
	       (reverse rs))
	      ((close-paren? op c)
	       (reverse (cons (string (getc)) rs)))
	      (else
	       (loop (cons (r6rs-read #f) rs)))))))
  (define (digit? c)
    (memv c '(#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9)))
  (define (hexdigit? c)
    (or (digit? c)
	(memv c '(#\a #\b #\c #\d #\e #\f
		  #\A #\B #\C #\D #\E #\F))))
  (define (read-identifier ctx)
    (define (loop rs)
      (let ((c (peek)))
	(cond ((or (eof-object? c) (delimiter? c))
	       (reverse-list->string rs))
	      ((eqv? c #\\)
	       (read-hex-escape (cons (getc) rs)))
	      (else
	       (loop (cons (getc) rs))))))
    (define (read-hex-escape ctx)
      (if (eqv? (peek) #\x)
	  (let recur ((rs (cons (getc) ctx)))
	    (let ((d (peek)))
	      (cond
	       ((hexdigit? d)
		(recur (cons (getc) rs)))
	       ((eqv? d #\;)
		(loop (cons (getc) rs)))
	       (else
		(loop rs)))))
	  (loop (cons c rs))))
    (if (and (pair? ctx) (eqv? (car ctx) #\\))
	(read-hex-escape ctx)
	(loop ctx)))
  (define (r6rs-read ctx)
    (let ((c (or ctx (getc))))
      (case c
	((#\;)
	 (tag:comment (read-until comment-end? (list c))))
	((#\")
	 (tag:string (read-string (list c))))
	((#\+ #\-)
	 (let ((d (peek)))
	   (if (delimiter? d)
	       (tag:identifier (string c))
	       (let ((s (read-until delimiter? (list c))))
		 ((cond ((and (eqv? c #\-) (eqv? d #\>)) tag:identifier)
			((string->number s) tag:number)
			(else tag:unknown))
		  s)))))
	((#\( #\[)
	 (apply tag:list (read-list c (list (string c)))))
	((#\.)
	 (cond
	  ((delimiter? (peek))
	   (tag:dot (string c)))
	  ((eqv? (peek) #\.)
	   (let ((d (getc)))
	     (if (eqv? (peek) #\.)
		 (let ((e (getc)))
		   (if (delimiter? (peek))
		       (tag:identifier (string c d e))
		       (tag:unknown (read-until delimiter? (list e d c)))))
		 (tag:unknown (read-until delimiter? (list d c))))))
	  (else
	   (tag:unknown (read-until delimiter? (list c))))))
	((#\' #\`)
	 (tag:abbrev (string c)))
	((#\,)
	 (tag:abbrev
	  (if (eqv? (peek) #\@)
	      (string c (getc))
	      (string c))))
	((#\#)
	 (let ((d (getc)))
	   (case d
	     ((#\t #\T #\f #\F)
	      (if (delimiter? (peek))
		  (tag:boolean (string c d))
		  (tag:unknown (read-until delimiter? (list d c)))))
	     ((#\\)
	      (if (eof-object? (peek))
		  (tag:unknown (string c d))
		  (tag:character (read-until delimiter? (list (getc) d c)))))
	     ((#\i #\I #\e #\E #\b #\B #\d #\D #\x #\X)
	      (let ((s (read-until delimiter?
				   (if (eqv? (peek) #\#)
				       (list (getc) d c)
				       (list d c)))))
		((if (string->number s) tag:number tag:unknown) s)))
	     ((#\!)
	      (tag:comment "#!" (read-identifier '())))
	     ((#\;)
	      (let loop ((x (r6rs-read #f))
			 (rs '()))
		(if (not (member (tag-class x)
				 '("abbrev" "atomosphere")))
		    (apply tag:comment "#;" (reverse (cons x rs)))
		    (loop (r6rs-read #f) (cons x rs)))))
	     ((#\|)
	      (tag:comment (read-block-comment 1 (list d c))))
	     ((#\' #\`)
	      (tag:abbrev (string c d)))
	     ((#\,)
	      (tag:abbrev (if (eqv? (peek) #\@)
			      (string c d (getc))
			      (string c d))))
	     ((#\()
	      (apply tag:vector (read-list d (list (string c d)))))
	     ((#\v)
	      (if (eqv? (peek) #\u)
		  (let ((e (getc)))
		    (if (eqv? (peek) #\8)
			(let ((f (getc)))
			  (if (eqv? (peek) #\()
			      (let ((g (getc)))
				(apply
				 tag:bytevector
				 (read-list g (list (string c d e f g)))))
			      (tag:unknown
			       (read-until delimiter? (list f e d c)))))
			(tag:unknown (read-until delimiter? (list e d c)))))
		  (tag:unknown (read-until delimiter? (list d c)))))
	     (else
	      (if (eof-object? d)
		  (tag:unknown (string c))
		  (tag:unknown (read-until delimiter? (list d c))))))))
	(else
	 (cond
	  ((eof-object? c) c)
	  ((whitespace? c)
	   (tag:atomosphere (read-until (complement whitespace?) (list c))))
	  ((digit? c)
	   (let ((s (read-until delimiter? (list c))))
	     ((if (string->number s) tag:number tag:unknown) s)))
	  (else
	   (tag:identifier (read-identifier (list c)))))))))
  (define (r6rs-read-all ctx)
    (let loop ((c ctx)
	       (rs '()))
      (let ((e (r6rs-read c)))
	(if (eof-object? e)
	    (reverse rs)
	    (loop #f (cons e rs))))))
  (if (eqv? (peek) #\#)
      (let ((c (getc)))
	(if (eqv? (peek) #\!)
	    (let ((d (getc)))
	      (let loop ((ch (getc))
			 (rs (list d c)))
		(cond
		 ((eof-object? ch)
		  `(pre (@ (class "scheme"))
			,(tag:script-header (reverse-list->string rs))))
		 ((eqv? ch #\newline)
		  `(pre (@ (class "scheme"))
			,(tag:script-header (reverse-list->string (cons ch rs)))
			,@(r6rs-read-all #f)))
		 (else
		  (loop (getc) (cons ch rs))))))
	    `(pre (@ (class "scheme"))
		  ,@(r6rs-read-all c))))
      `(pre (@ (class "scheme"))
	    ,@(r6rs-read-all #f))))

(use sxml.serializer)

(define (main args)
  (srl:parameterizable
   `(html
     (head
      (title ,(cadr args))
      (script (@ (type "text/javascript")
		 (src "./highlight.js"))
	      " ")
      (link (@ (rel "stylesheet")
	       (type "text/css")
	       (href "./style.css"))))
     (body
      (div (span (@ (onclick "highlighter.highlight()"))
		 "highlight!"))
      ,(call-with-input-file (cadr args) r6rs->shtml)))
   (current-output-port)
   '(omit-xml-declaration? . #f)
   '(indent . #f)
   )
  0)