#!/bin/sh
: ; exec klone $0 "$@"
; The above line allows not to embed the exact path of the klone executable

;;Skeleton of a typical klone script

(setq args (getopts "USAGE: %0 [options] [wwwboard-dir]
reads all files in a WWWboard directory (pwd by default) and outputs a text
file, if no other output option is given
"
    ("-web" dir webdir "creates a web site in dir. 
reads config file there in file .config")
    ("-url" url weburl "gives external URL for the web site")
    ("-mbox" file mboxfile "prints to file in mbox format")
    ("-v" () verbose "verbose operation")
;; --- Hidden Options ---
    ("-debug" () enter-debugger-on-error "enter klone debugger on error"
    :hidden t)
    ("-stackdump" () stackdump-on-error "verbose stack dump on error"
    :hidden t)
))

(if enter-debugger-on-error (kdb t))
(if stackdump-on-error (stack-dump-on-error t))

;;=============================================================================
;;                    Data
;;=============================================================================
(defstruct Message
  id					;number (string)
  subject				;title
  email					;email of poster, can be ()
  from					;name of poster
  date					;date of post: numlist: Y M D H M S
  in-reply-to				;id
  followups				;list of ids
  body					;text of message
  link					;optional URL, can be ()
  linkname				;its descriptive text, can be ()
  image					;optional image URL, can be ()
)  

id2mes = (Hashtable ())

;;=============================================================================
;;                    Code
;;=============================================================================
(defun main (&aux 
    messages
  )
  messages = (read-w3b-dir (if args (0 args) "."))
  ;;(verify-html-body messages)
  (if mboxfile (print-mboxfile messages mboxfile))
  (if (not (or webdir mboxfile))
    (print-text messages)
  )
)


(defun read-w3b-dir (dir &aux
    (re-filename (regcomp "^([0-9]+)[.]html$"))
    (messages (vector))
    mes
  )
  (dolist (file (directory dir))
    (if (re-filename file) (progn
	(verbose? "reading message file %0" file)
	(with (mes (read-w3b-file (+ dir "/" file) (re-filename 1)))
	  (put id2mes mes.id mes)
	  (lappend messages mes))
  )))
  messages
)

(dohash (v s [
      re-title "<title>([^<]*)</title>"
      re-from "<b>Posted by: </b><a href=\"mailto:([^\"]*)\">([^<]*)</a> on ([^<]*)<p>"
      re-from2 "<b>Posted by: </b>(.*) on ([A-Z][a-z][^<]*)<p>"
      re-reply "<b>In Reply to: </b><a href=\"([0-9]+)[.]html\">"
      re-br "^<br>$"
      re-mess-end "^<br><hr size=7 width=75%><p>$"
      re-followups "^<a name="followups"><b>Follow Ups:</b></a><br>$"
      re-followup "<a href=\"([0-9]+)[.]html\">"
      re-end-followups "^<br><hr><p>$"
      re-link "^<ul><li><a href=\"([^\"]+)\">([^<]*)</a></ul>$"
      re-link2 "^<ul><li><a href=\"([^\"]+)$"
      re-link3 "^\">([^<]*)</a></ul>$"
      re-image "^<center><img src=\"([^\"]+)\"></center><p>$"
      re-date "([A-Z][a-z]+) +([0-9]+), +([0-9]+) at ([0-9][0-9]):([0-9][0-9]):([0-9][0-9])"
      re-tag-p "<p>"
      re-tag-br "<br>"
      re-empty-space-start "^([ \t\r]\n)+"
      re-empty-space-end "([ \t\r\n])+$"
  ])
  (set v (regcomp s))
)

(setq monthnames ["???" "January" "February" "March" "April" "May" "June"
    "July" "August" "September" "October" "November" "December"]
)

(defun read-w3b-file (filename id &aux
    (fd (open filename))
    line
    (mes (Message))
  )
  mes.id = id
  (skip-lines 2)
  (read-re re-title) mes.subject = (re-title 1)
  (skip-lines 8)
  line = (read-line fd)
  (if (re-from line) (progn		;email given
      mes.email = (re-from 1)
      mes.from = (re-from 2)
      mes.date = (re-from 3)
    )
    (re-from2 line) (progn		;no email given
      mes.from = (re-from2 1)
      mes.date = (re-from2 2)
    )
  )
  (replace-string mes.date {regcomp "19100"} "2000" :all t :quote t) ;Y2K bug
  (if (re-date mes.date) 
    (with (month (seek monthnames (re-date 1)))
      (if (not month) 
	(error "Parse error WWWBoard file: %0\n    bad date: %1" mes.date)
      )
      mes.date = (list
	  (Int (re-date 3)) month (Int (re-date 2))
	  (Int (re-date 4))(Int (re-date 5))(Int (re-date 6))
    ))
    (error "Parse error WWWBoard file: %0\n    bad date: %1" mes.date)
  )
  line = (read-line fd)
  (if (re-reply line) (progn		;this is a reply to
      mes.in-reply-to = (re-reply 1)
      line = (read-line fd)
  ))
  (if (re-image line) (progn		;there is an embedded image
      mes.image = (re-image 1)
      line = (read-line fd)
  ))
  mes.body = line			;body fits on a line
  (replace-string line re-tag-br "\n" :all t :quote t) ;trim html tags
  (replace-string line re-tag-p "\n\n" :all t :quote t)
  (replace-string line re-empty-space-start "" :all t :quote t)
  (replace-string line re-empty-space-end "" :all t :quote t)
  (read-re re-br) 
  line = (read-line fd ())
  (if (re-link line) (progn		;embedded URL
      mes.link = (re-link 1)
      mes.linkname = (re-link 2)
      (read-re re-mess-end)	
    )
    (re-link2 line) (progn		;in case of \n at end of URL
      (read-re re-link3)
      mes.link = (re-link2 1)
      mes.linkname = (re-link3 1)
      (read-re re-mess-end)
    )
    ;; else we read re-mess-end, gobbled
    (re-mess-end line) ()
    t (error "Parse error WWWBoard file: %0\n    re: %1\n  line: %2" 
      filename re-mess-end line
    )
  )
  (read-re re-followups)
  mes.followups = (vector)
  line = (read-line fd ())
  (while (and line (not (re-end-followups line)))
    (if (re-followup line)
      (lappend mes.followups (re-followup 1))
    )
    line = (read-line fd ())
  )
  mes
)

(defun skip-lines (n &aux line) 
  (dotimes n line = (read-line fd ()))
  (if (not line) (error "Truncated WWWBoard file: %0" filename))
)

(defun read-re (re &optional ignore-err? &aux (line (read-line fd ())))
  ;; global: fd filename
  (if (not line) ; EOF
    (error "Truncated WWWBoard file: %0" filename)
    (re line)
    line
    (if ignore-err? ()
      (error "Parse error WWWBoard file: %0\n    re: %1\n  line: %2" 
	filename re line
    ))
  )
)

;; misc checks

(defun verify-html-body (messages &aux
    (re-html (regcomp "<(p|br)>"))
  )
   (dolist (mes messages)
     (doregexp (re "<[^>]*>" mes.body)
       (if (not (re-html (re 0)))
 	(PF "*** In message %0: html tag %1\n" mes.id (re 0))
))))

;;=============================================================================
;;                    Text output
;;=============================================================================
;; a mail-style format. ^L### separates mails
(defun print-text (messages &aux)
  (dolist (mes messages)
    (PF "%0 %1\n" (make-string 72 #\#) mes.id)
    (print-message mes)
  )
)

;; Individual messages
(defun print-message (message &optional (out  *standard-output*))
  (PF out "Subject: %0\n" mes.subject)
  (PF out "From: %0\n" mes.from)
  (if mes.email (PF out "Email: %0\n" mes.email))
  (with (d mes.date)
    (PF out "Date: %0-%1-%2 %3:%4:%5\n" d.0 (expand-num d.1 2) 
      (expand-num d.2 2) (expand-num d.3 2) (expand-num d.4 2)
      (expand-num d.5 2)
  ))
  (PF out "Id: %0\n" mes.id)
  (if mes.in-reply-to (PF out "In-Reply-To: %0\n" mes.in-reply-to))
  (when mes.followups 
    (PF out "Followups:")
    (dolist (f mes.followups) (PF out " %0" f))
    (PF out "\n")
  )
  (if mes.link 
    (PF out "Link-URL: %0\nLink-Name: %1\n" mes.link mes.linkname))
  (if mes.image (PF out "Image: %0\n" mes.image))
  (PF out "\n%0\n\n" mes.body)
)

(defun print-mboxfile (messages outname &aux
    (fd (open outname :direction :output :if-exists :supersede :error ()))
  )
  (if (not fd) (die 1 "Cannot create file %0\n" outname))
  (dolist (mes messages) (print-mboxentry mes fd))
)

print-mboxentry-re = (regcomp "\nFrom ")
print-mboxentry-re2 = (regcomp "^From ")

(defun print-mboxentry (mes out &aux
    (sender (if mes.email (PF String "%0 <%1>" mes.from mes.email)
	(PF String "%0" mes.from)
    ))
    (d (time-to-date (date-to-time mes.date)))
    sep body
  )
  (PF out "From %0 %1 %2 %3 %4:%5:%6 %7\n" mes.from
    (get '[Sun Mon Tue Wed Thu Fri Sat] d.6 "Mon") 
    (get '[Jan Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec] d.1 'Jan)
    d.2 d.3 d.4 d.5 d.0
  )
  (PF out "From: %0\n" sender)
  (PF out "Subject: %0\n" mes.subject)
  (PF out "Date: %0, %1 %2 %3 %4:%5:%6 +0000\n"
    (get '[Sun Mon Tue Wed Thu Fri Sat] d.6 "Mon") d.2 
    (get '[Jan Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec] d.1 'Jan)
    d.0 d.3 d.4 d.5
  )
  (PF out "Message-Id: <%0>\n" mes.id)
  (if mes.in-reply-to (PF out "In-Reply-To: <%0>\n" mes.in-reply-to))
;;  (when mes.followups 
;;    (PF out "X-Followups:")
;;    sep = ()
;;    (dolist (f mes.followups) 
;;      (if sep (PF out ",") sep = t)
;;      (PF out " <%0>" f)
;;    )
;;    (PF out "\n")
;;  )
  (if mes.link 
    (PF out "X-Link-URL: %0\nX-Link-Name: %1\n" mes.link mes.linkname))
  (if mes.image (PF out "X-Image: %0\n" mes.image))
  body = mes.body
  (replace-string  body print-mboxentry-re "\n From " :all t :quote t)
  (replace-string  body print-mboxentry-re2 " From " :quote t)
  (PF out "\n%0\n\n" mes.body)
)

(main)

;;; EMACS MODES
;;; Local Variables: ***
;;; mode:lisp ***
;;; End: ***

