#! /bin/sh
exec ${GUILE-guile} -e "(ttn-do sizzweb)" -s $0 "$@" # -*-scheme-*-
!#
;;; sizzweb --- refinement of Martin Grabmueller's web server

;; Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Thien-Thi Nguyen
;; Copyright (C) 2000, 2001 Martin Grabmueller <mgrabmue@cs.tu-berlin.de>
;;
;; This file is part of ttn-do, released under the terms of the
;; GNU General Public License as published by the Free Software
;; Foundation; either version 3, or (at your option) any later
;; version.  There is NO WARRANTY.  See file COPYING for details.

;;; Description: Simple web server.

;;; Commentary:

;; Usage: sizzweb -r DIR [options...]
;;
;;   -p, --port=PORT     -- Listen on PORT [8001]; to specify a UNIX
;;                          domain socket, use "-p unix:FILENAME"
;;   -r, --docroot DIR   -- Specify DIR as filesystem root (required)
;;   -u, --ulibdir DIR   -- Look in DIR for servlets.scm [$HOME/.sizzweb.d]
;;   -d, --daemon FILE   -- Write pid and tcp port to FILE, go into background
;;   -l, --logfile FILE  -- Append logs to FILE [stdout if not --daemon]
;;       --funky         -- Don't use this unless you have read the source
;;   -b, --boredom N     -- Hang up after N inactive seconds (minimum 15)
;;
;; To stop sizzweb when invoked with -d FILE, use: kill -1 `head -1 FILE`
;; (note the backticks).  To reload servlets.scm, use kill -2.

;;; Code:

(define-module (ttn-do sizzweb)
  #:export (main)
  #:use-module ((ttn-do zzz banalities) #:select (check-hv
                                                  qop<-args))
  #:use-module ((ice-9 regex) #:select (string-match
                                        regexp-quote
                                        match:suffix
                                        match:prefix))
  #:use-module ((www utcsec) #:prefix UTCS: #:select (rfc1123-date<-
                                                      <-mtime
                                                      rfc1123-now))
  #:use-module ((www server-utils big-dishing-loop)
                #:select (make-big-dishing-loop))
  #:use-module ((www server-utils filesystem)
                #:select (access-forbidden?-proc
                          upath->filename-proc
                          filename->content-type))
  #:use-module ((www server-utils log) #:select (log-http-response-proc))
  #:use-module ((www data http-status) #:select (http-status-string))
  #:use-module ((www data mime-types) #:select (reset-mime-types!
                                                put-mime-types-from-file!))
  #:use-module ((ttn-do zz sys linux-gnu) #:select (sendfile/never-fewer))
  #:use-module ((ttn-do zzz xhtml-tree) #:select (~simple-strict-xhtml
                                                  ~head ~title ~body
                                                  ~h2 ~hr
                                                  ~a ~br ~b
                                                  ~table ~tr ~td))
  #:use-module ((ttn-do zzz personally) #:select (accumulator
                                                  fs fso fse make-fso))
  #:use-module ((ttn-do zzz senz-altro) #:select (daemonize))
  #:use-module ((ttn-do zzz filesystem) #:select (filtered-files-in-vicinity
                                                  not-dot-not-dotdot)))

(define *sizzweb-version*
  ;; 1.0 -- initial release
  ;; 1.1 -- `Date' response header bugfix
  ;;        can reload servlets.scm w/ `kill -2'
  ;;        handles *.gz for Content-Encoding
  ;;        prettier internal filesystem directory display
  ;;        dynamic handlers updated in place
  ;;        new proc: remove-dynamic-handler!
  ;; 1.2 -- single-process operation unless given option --funky
  ;;        can override `send-generated-directory-index'
  ;;        can die of boredom
  ;; 1.3 -- finish cleanly on SIGQUIT and SIGTERM
  ;; 1.4 -- do "zero-copy TCP" for file transfers
  ;; 1.5 -- bugfix: handle SIGTERM when daemon
  ;;        log port accessible to servlets
  ;; 1.6 -- transmit-file includes Last-Modified header
  ;; 1.7 -- when --daemon, close stdin, stdout, stderr
  ;; 1.8 -- bugfix: shutdown socket after sending error message
  ;;        loop on `shutdown 2' check
  ;;        log restart/shutdown
  ;; 1.9 -- output UTF-8 XHTML
  ;;        support UNIX-domain sockets
  ;;        handle HTTP HEAD request
  ;; 1.10 - use SO_LINGER
  ;;        do `shutdown 2' without check/loop
  ;; 1.11 - ignore ENOTCONN on socket `shutdown'
  ;;        manage mime-types db directly
  ;;        use "/" in Server header http response
  ;; 1.12 - bugfix: unbotch `cond' to `or' conversion (ugh)
  "1.12")
(define *server-name* (string-append "SizzWeb " *sizzweb-version*))
(define *server-name-with-slash* (let* ((s (string-copy *server-name*))
                                        (sp (string-index s #\space)))
                                   (string-set! s sp #\/)
                                   s))

(define *system-mime-types* "/etc/mime.types")

(define fs-name #f)
(define load-servlets! #f)
(define *log-port* #f)

;; Support

(define RX:suffix:gz (make-regexp "[.]gz$"))

(define (name->type fn)
  (filename->content-type fn "text/plain"))

(define (~td/right x)
  (~td 'align "right" x))

(define (~page/titled title . x)
  (~simple-strict-xhtml
   (~head (~title title))
   (~body x (~hr) *server-name*)))

;; Standard responses ================================================

(define (add-standard-headers M)
  (M #:add-header #:Date (UTCS:rfc1123-now))
  (M #:add-header #:Server *server-name-with-slash*))

(define (send-error M number . body)
  (let ((msg (http-status-string number))
        (nstr (number->string number)))
    (M #:set-reply-status number msg)
    (add-standard-headers M)
    (M #:add-header #:Connection "close")
    (M #:add-header #:Content-Type "text/html")
    (M #:add-content (let ((title (list nstr " " msg)))
                       (~page/titled title (~h2 title) body)))
    (M #:rechunk-content #t)
    (M #:send-reply 2)))

(define (send-not-found M upath)
  (send-error M 404
              "The requested URL:" (~br)
              (~b upath) (~br)
              "was not found on this server."))

(define (send-bad-request M)
  (send-error M 400
              "Your browser sent a request that"
              " this server could not understand."))

(define (send-unknown-method M method upath)
  (send-error M 501
              (~b (symbol->string method))
              " to "
              (~b upath)
              " not supported."))

(define (send-forbidden M upath)
  (send-error M 403
              "You do not have permission to access:" (~br)
              (~b upath)))

(define (send-moved-permanently M upath)
  (send-error 301
              "The document has moved to a "
              (~a 'href upath "new location") "."))

;; Special responses =================================================

(define send-generated-directory-index-box
  (let ((actual #f))
    (lambda args
      (if (null? args)
          actual
          (set! actual (car args))))))

;; Construct and send a directory index.
;;
(define (send-generated-directory-index M upath dir)
  (define (upath-sub b e)
    (substring upath b e))
  (add-standard-headers M)
  (M #:set-reply-status:success)
  (M #:add-header #:Connection "close")
  (M #:add-header #:Content-Type "text/html")
  (let ((last-char-idx (1- (string-length upath))))
    (and (< 0 last-char-idx)
         (char=? #\/ (string-ref upath last-char-idx))
         (set! upath (upath-sub 0 last-char-idx))))
  (M #:add-content
     (~page/titled
      (list "Directory " upath)
      (~h2 "Parent Directories")
      (let ((acc (accumulator)))
        (let loop ((start 0))
          (cond ((string-index upath #\/ start)
                 => (lambda (cut)
                      (set! cut (1+ cut))
                      (acc (cons start cut))
                      (loop cut)))
                (else
                 (~table
                  (map (lambda (x)
                         (~tr (~td "")
                              (~td (~a 'href (upath-sub 0 (cdr x))
                                       (upath-sub (car x) (cdr x))))))
                       (acc)))))))
      (~hr)
      (~h2 "Directory " (basename upath))
      (let* ((? vector-ref)
             (subd "(subdir)")
             (raw (filtered-files-in-vicinity
                   dir (lambda (fn)
                         (and (not-dot-not-dotdot fn)
                              (let* ((si (stat (in-vicinity dir fn)))
                                     (mt (strftime "%F %T"
                                                   (localtime (stat:mtime si)))))
                                (if (eq? 'directory (stat:type si))
                                    (vector
                                     subd
                                     ""
                                     (string-append fn "/")
                                     mt)
                                    (vector
                                     (cond ((regexp-exec RX:suffix:gz fn)
                                            => (lambda (m)
                                                 (name->type (match:prefix m))))
                                           (else
                                            (name->type fn)))
                                     (number->string (stat:size si))
                                     fn mt)))))))
             (all (sort raw (lambda (a b)
                              (cond ((let ((a-dir? (eq? subd (? a 0)))
                                           (b-dir? (eq? subd (? b 0))))
                                       (or (and a-dir?
                                                b-dir?)
                                           (and (not a-dir?)
                                                (not b-dir?))))
                                     (string<? (? a 2) (? b 2)))
                                    (else
                                     (eq? subd (? a 0))))))))
        (~table (map (lambda (type size fn mtime)
                       (~tr (~td "")
                            (~td/right size)
                            (~td mtime)
                            (~td type)
                            (~td (~a 'href (in-vicinity upath fn) fn))))
                     (map (lambda (x) (? x 0)) all)
                     (map (lambda (x) (? x 1)) all)
                     (map (lambda (x) (? x 2)) all)
                     (map (lambda (x) (? x 3)) all))))))
  (M #:rechunk-content (* 16 1024))
  (M #:send-reply))

;; Transfer a file.
;;
(define (transmit-file M filename)
  (add-standard-headers M)
  (M #:set-reply-status:success)
  (M #:add-header #:Connection "close")
  (cond ((regexp-exec RX:suffix:gz filename)
         => (lambda (m)
              (M #:add-header #:Content-Encoding "x-gzip")
              (M #:add-header #:Content-Type (name->type (match:prefix m)))))
        (else
         (M #:add-header #:Content-Type (name->type filename))))
  (let* ((p (open-input-file filename))
         (si (stat p))
         (len (stat:size si)))
    (M #:add-header #:Last-Modified (UTCS:rfc1123-date<- #f (UTCS:<-mtime p)))
    (M #:add-direct-writer len
       (lambda (out-port)
         (sendfile/never-fewer (fileno out-port) (fileno p) 0 len)))
    (let ((rv (M #:send-reply)))
      (close-port p)
      rv)))

;; Dynamic URLs ======================================================

;; This is the list of registered dynamic handlers.  The car of the
;; association is a compiled regexp, the cdr a vector in the form
;; #(RE-STR HANDLER).  We keep RE-STR (uncompiled regexp) around in
;; order to support dynamic reloading of the config file.

(define *dynamic-url-handlers* '())

(define (add-dynamic-handler! re-str handler)
  ;; If RE-STR is already registered, update it w/ HANDLER.  Otherwise,
  ;; append a new pair (RX . #(RE-STR HANDLER)) to the global alist.
  (let loop ((ls *dynamic-url-handlers*))
    (cond ((null? ls)
           (set! *dynamic-url-handlers*
                 (append!               ; maintain order
                  *dynamic-url-handlers*
                  (acons (make-regexp re-str)
                         (vector re-str handler)
                         '()))))
          ((string=? (vector-ref (cdar ls) 0) re-str)
           (vector-set! (cdar ls) 1 handler))
          (else
           (loop (cdr ls))))))

(define (remove-dynamic-handler! re-str)
  (let loop ((ls *dynamic-url-handlers*))
    (cond ((null? ls))
          ((string=? (vector-ref (cdar ls) 0) re-str)
           (set! *dynamic-url-handlers*
                 (delq (car ls) *dynamic-url-handlers*)))
          (else
           (loop (cdr ls))))))

;; Return a dynamic handler suitable for `upath', or #f if non found.
;;
(define (find-dynamic-url-handler upath)
  (let loop ((ls *dynamic-url-handlers*))
    (cond ((null? ls) #f)
          ((regexp-exec (car (car ls)) upath) (vector-ref (cdr (car ls)) 1))
          (else (loop (cdr ls))))))

(define (now-timestamp-string)
  (strftime "[%F %T]: " (localtime (current-time))))

;; Main program ======================================================

(define (make-server-loop no-access? concurrency boredom)

  (define (spewer method)
    (lambda (M upath headers in-port)
      (and boredom (alarm boredom))
      (and (eq? 'HEAD method) (M #:inhibit-content! #t))
      (cond ((find-dynamic-url-handler upath)
             => (lambda (handle)
                  (handle M in-port upath headers)))
            (else
             (let ((filename (fs-name upath)))
               (cond ((not filename)
                      (send-not-found M upath))
                     ((no-access? filename)
                      (send-forbidden M upath))
                     ((file-is-directory? filename)
                      ((send-generated-directory-index-box)
                       M upath filename))
                     (else
                      (transmit-file M filename))))
             (catch
              'system-error
              (lambda ()
                (shutdown in-port 2)
                #t)
              (lambda (key who fmt args errno)
                ;; Ignore ENOTCONN "Transport endpoint is not connected".
                (or (= ENOTCONN (car errno))
                    (let ((s (fs "~AInternal error on `~A': ~A~%"
                                 (now-timestamp-string) who
                                 (apply fs fmt args))))
                      (display s *log-port*)
                      (fse "~A" s)
                      (raise SIGTERM)
                      #f))))))))

  (send-generated-directory-index-box send-generated-directory-index)
  (and boredom (alarm boredom))
  (make-big-dishing-loop
   #:socket-setup `((,SO_REUSEADDR . 1)
                    (,SO_LINGER 1 . 60))
   #:concurrency concurrency
   #:need-headers #t
   #:need-input-port #t
   #:method-handlers (map (lambda (m)
                            (cons m (spewer m)))
                          '(HEAD GET))
   #:status-box-size 2
   #:bad-request-handler send-bad-request
   #:unknown-http-method-handler send-unknown-method
   #:log (log-http-response-proc *log-port*)))

(define (log-timestamped s . args)
  (or (eq? (current-output-port) *log-port*)
      (let ((flog (make-fso *log-port*)))
        (flog "~A" (now-timestamp-string))
        (apply flog s args)
        (flog "~%")
        (force-output *log-port*))))

(define (close-socket sig listening-port)
  (log-timestamped "Shutdown (~A)" sig)
  (and (= PF_UNIX (car listening-port))
       (delete-file (list-ref listening-port 2))))

(define (as-daemon pidfile dish listening-port)
  (daemonize ->bool (lambda (bye)
                      (define (finish sig)
                        (sigaction (variable-ref (builtin-variable sig))
                                   (lambda ignored
                                     (close-socket sig listening-port)
                                     (bye sig))))
                      ;; re-open log port
                      (let ((fn (port-filename *log-port*)))
                        (and (file-exists? fn)
                             (set! *log-port* (open-file fn "al"))))
                      (finish 'SIGTERM)
                      (finish 'SIGHUP)
                      (log-timestamped "Restart (pid ~A pidfile ~S)"
                                       (getpid) pidfile)
                      (dish listening-port))
             pidfile
             listening-port))

(define (main/qop qop)
  (let ((parent-pid (getpid))
        (docroot (qop 'docroot (lambda (dir) (in-vicinity dir ""))))
        (listening-port (or (qop 'port (lambda (s)
                                         (cond ((string-match "^unix:" s)
                                                => (lambda (m)
                                                     (list PF_UNIX
                                                           AF_UNIX
                                                           (match:suffix m))))
                                               (else
                                                (list PF_INET
                                                      AF_INET
                                                      INADDR_ANY
                                                      (string->number s))))))
                            (list PF_INET
                                  AF_INET
                                  INADDR_ANY
                                  8001))))
    (set! fs-name (upath->filename-proc
                   docroot '("index.shtml" "index.html")))
    (let ((fn (in-vicinity (or (qop 'ulibdir)
                               (in-vicinity (getenv "HOME") ".sizzweb.d"))
                           "servlets.scm")))
      (sigaction SIGINT (lambda ignored
                          (false-if-exception (load fn))))
      (set! load-servlets! (lambda () (kill parent-pid SIGINT))))
    (set! *log-port* (cond ((qop 'logfile)
                            => (lambda (logfile)
                                 (open-file logfile "al")))
                           ((qop 'daemon)
                            (open-output-file *null-device*))
                           (else
                            (current-output-port))))
    (cond ((file-exists? *system-mime-types*)
           (reset-mime-types! 491)
           (put-mime-types-from-file! 'quail *system-mime-types*)))
    (load-servlets!)
    (let ((dish (make-server-loop (access-forbidden?-proc
                                   docroot (regexp-quote "/../"))
                                  (and (qop 'funky) #:new-process)
                                  (and=> (and=> (qop 'boredom) string->number)
                                         (lambda (b)
                                           (sigaction SIGALRM (lambda ignored
                                                                (raise SIGHUP)))
                                           (max 15 b))))))
      (cond ((qop 'daemon)
             => (lambda (pidfile)
                  (as-daemon pidfile dish listening-port)))
            (else
             (log-timestamped "Restart (pid ~A)" parent-pid)
             (fso "Starting pid ~A on port ~A, with docroot ~S.~%"
                  parent-pid listening-port docroot)
             (call-with-current-continuation
              (lambda (done)
                (define (finish sig)
                  (sigaction (variable-ref (builtin-variable sig))
                             (lambda ignored
                               (fso "(got ~A)~%" sig)
                               (close-socket sig listening-port)
                               (done #t))))
                (finish 'SIGQUIT)
                (finish 'SIGTERM)
                (dish listening-port)))
             (fso "Shutting down~%")
             #t)))))

(define (main args)
  (check-hv args `((package . "ttn-do")
                   (version . ,*sizzweb-version*)
                   (help . commentary)))
  (main/qop
   (qop<-args
    args (let ((valid-dir (lambda (file)
                            (define (bad msg)
                              (fso "~A: ~A: ~A~%" (car args) msg file)
                              #f)
                            (and (or (access? file R_OK)
                                     (bad "cannot read"))
                                 (or (file-is-directory? file)
                                     (bad "not a directory"))))))
           `((funky)
             (boredom (single-char #\b) (value #t))
             (port    (single-char #\p) (value #t))
             (docroot (single-char #\r) (value #t) (required? #t)
                      (predicate ,valid-dir))
             (ulibdir (single-char #\u) (value #t)
                      (predicate ,valid-dir))
             (daemon  (single-char #\d) (value #t))
             (logfile (single-char #\l) (value #t)))))))

;;; sizzweb.scm ends here