Plan 9 from Bell Labs’s /usr/web/sources/contrib/fernan/nhc98/src/greencard/HandLex.hs

Copyright © 2021 Plan 9 Foundation.
Distributed under the MIT License.
Download the Plan 9 distribution.


module HandLex
  ( gcLex	-- :: String -> [Token]
  , Posn(..)
  , TokenT(..)
  , Token
  ) where

import Char

#if !defined(__HASKELL98__)
#define isAlphaNum isAlphanum
#endif

type Token = (Posn, TokenT)

data Posn = Pn Int Int		-- line and column
        deriving Eq

instance Show Posn where
      showsPrec p (Pn l c) = showString "line " . shows l .
                             showString " col " . shows c

data TokenT = TokFun
      | TokCall
      | TokResult
      | TokFail
      | TokConst
      | TokDis
      | TokPrefix
      | TokOpen
      | TokClose
      | TokCurOpen
      | TokCurClose
      | TokSqOpen
      | TokSqClose
      | TokAngOpen
      | TokAngClose
      | TokAng2Open
      | TokAng2Close
      | TokSlash
      | TokComma
      | TokEqual
      | TokArrow
      | TokCoCo
      | TokDeclare
      | TokIn
      | TokName     String
      | TokDisName  String
      | TokBaseTy   String
      | TokHaskell  String
      | TokC        String
      | TokCExp     String
      | TokCCode   [String]
      deriving (Eq,Show)

trim, revtrim :: String -> String
trim    = f . f         where f = reverse . dropWhile isSpace
revtrim = f.reverse.f   where f = dropWhile isSpace

emit :: TokenT -> Posn -> Token
emit tok p = (p,tok)

lexerror :: Posn -> String -> a
lexerror p s = error ("Lexical error: "++s++" at "++show p++"\n")

addcol :: Int -> Posn -> Posn
addcol n (Pn r c) = Pn r (c+n)

newline, tab :: Posn -> Posn
newline (Pn r c) = Pn (r+1) 0
tab     (Pn r c) = Pn r (((c`div`8)+1)*8)

----
gcLex :: String -> [Token]
gcLex = lextop gcAny (Pn 1 0) . lines

lextop :: (Posn->String->[String]->[Token]) -> Posn -> [String] -> [Token]
lextop k p     []        = []
lextop k p (('%':s):ss)  = gcStart k (addcol 1 p) s ss
lextop k p (s:ss)        = emit (TokHaskell s) p :
                           lextop k (newline p) ss

gcStart :: (Posn->String->[String]->[Token]) ->
            Posn -> String -> [String] -> [Token]
gcAny, gcConst               :: Posn ->  String  -> [String] -> [Token]
gcUser             :: String -> Posn ->  String  -> [String] -> [Token]
gcLine :: ([String]->TokenT) -> Posn ->  String  -> [String] -> [Token]

gcStart k p s ss
    | take 1 s == "-"        =                     skip 1 p s ss gcC
    | take 1 s == "C"        =                     skip 1 p s ss (blank gcC)
    | take 3 s == "fun"      = emit TokFun p:      skip 3 p s ss gcAny
    | take 3 s == "dis"      = emit TokDis p:      skip 3 p s ss gcAny
    | take 4 s == "call"     = emit TokCall p:     skip 4 p s ss gcAny
    | take 4 s == "fail"     = emit TokFail p:     skip 4 p s ss gcAny
    | take 4 s == "code"     =                   skip 4 p s ss (gcLine TokCCode)
    | take 5 s == "const"    = emit TokConst p:    skip 5 p s ss gcConst
    | take 6 s == "result"   = emit TokResult p:   skip 6 p s ss gcAny
    | take 6 s == "prefix"   = emit TokPrefix p:   skip 6 p s ss gcAny
    | otherwise              = k p s ss

skip :: Int -> Posn -> String -> [String] ->
                                 (Posn->String->[String]->[Token]) -> [Token]
skip n p s ss k = k (addcol n p) (drop n s) ss

blank :: (Posn->String->[String]->[Token]) ->
          Posn -> String -> [String] -> [Token]
blank k p    []    ss  = lextop (blank k) (newline p) ss
blank k p ['\^M']  ss  = lextop (blank k) (newline p) ss
blank k p (' ': s) ss  = blank k (addcol 1 p) s ss
blank k p ('\t':s) ss  = blank k (tab p) s ss
blank k p    s     ss  = k p s ss

gcC p s ss  = emit (TokC s) p : lextop gcAny (newline p) ss

gcAny = blank gcAny'
  where
    gcAny' p ('"':s)  ss  =                      gcCExp "" (addcol 1 p) s ss
    gcAny' p ('{':s)  ss  = emit TokCurOpen p:   gcAny (addcol 1 p) s ss
    gcAny' p ('}':s)  ss  = emit TokCurClose p:  gcAny (addcol 1 p) s ss
    gcAny' p ('(': s) ss  = emit TokOpen p:      gcAny (addcol 1 p) s ss
    gcAny' p (')': s) ss  = emit TokClose p:     gcAny (addcol 1 p) s ss
    gcAny' p ('[': s) ss  = emit TokSqOpen p:    gcAny (addcol 1 p) s ss
    gcAny' p (']': s) ss  = emit TokSqClose p:   gcAny (addcol 1 p) s ss
    gcAny' p ('<': s) ss
      | take 1 s == "<"   = emit TokAng2Open p:  skip 1 p s ss (gcUser [])
      | otherwise         = emit TokAngOpen p:   gcUser [] (addcol 1 p) s ss
    gcAny' p ('>': s) ss  = emit TokAngClose p:  gcAny (addcol 1 p) s ss
    gcAny' p (',': s) ss  = emit TokComma p:     gcAny (addcol 1 p) s ss
    gcAny' p ('=': s) ss  = emit TokEqual p:     gcAny (addcol 1 p) s ss
    gcAny' p s ss
      | take 2 s == "->"      = emit TokArrow p:   skip 2 p s ss gcAny
      | take 2 s == "::"      = emit TokCoCo p:    skip 2 p s ss gcAny
      | take 3 s == "in "     = emit TokIn p:      skip 3 p s ss gcAny
      | take 7 s == "declare" = emit TokDeclare p: skip 7 p s ss gcAny
      | take 2 s == "%%"      = ident TokBaseTy (addcol 2 p) (drop 2 s) ss gcAny
      | ('A'<=h && h<='Z') ||
        ('0'<=h && h<='9') ||
         '_'==h || h=='\'' ||
         '`'==h               = ident TokName    p s ss gcAny
      | ('a'<=h && h<='z')    = ident TokDisName p s ss gcAny
      | otherwise             = lexerror p "unrecognised input"
      where h = head s

gcCExp acc = blank (lit acc)
  where lit acc p ('"':s)     ss = emit (TokCExp (reverse acc)) p:
                                   gcAny (addcol 1 p) s ss
        lit acc p ('%':'"':s) ss = lit ('"':acc) (addcol 2 p) s ss
        lit acc p (h:s)       ss = lit (h:acc) (addcol 1 p) s ss
        lit acc p  []         ss = lexerror p "missing \""
      --lit acc p  []         ss = lextop (lit ('\n':acc)) p ss

gcLine tok p s ss  = multiline tok (p,[trim s]) (newline p) ss

gcConst = blank gcConst1
  where
    gcConst1 p s ss
      | ('A'<=h && h<='Z') ||
        ('0'<=h && h<='9') ||
         '_'==h || h=='\'' ||
         '`'==h            = ident TokName    p s ss gcConst2
      | ('a'<=h && h<='z') = ident TokDisName p s ss gcConst2
      | otherwise          = lexerror p "%const not followed by type or DISname"
      where h = head s
    gcConst2 = blank gcConst3
    gcConst3 p ('[':s) ss = emit TokSqOpen p:  gcAny (addcol 1 p) s ss
    gcConst3 p     s   ss = lexerror p "%const type/DISname not followed by ["


gcUser acc = blank (gcUser' acc)
  where gcUser' acc p ('/':s) ss = emit (TokName (revtrim acc)) p:
                                   emit TokSlash p: gcUser [] (addcol 1 p) s ss
        gcUser' acc p ('>':'>':s) ss
                                 = emit (TokName (revtrim acc)) p:
                                   emit TokAng2Close p: gcAny (addcol 1 p) s ss
        gcUser' acc p ('>':s) ss = emit (TokName (revtrim acc)) p:
                                   emit TokAngClose p:  gcAny (addcol 1 p) s ss
        gcUser' acc p ('-':'>':s) ss = gcUser' ('>':'-':acc) (addcol 2 p) s ss
        gcUser' acc p (h:s)   ss = gcUser' (h:acc) (addcol 1 p) s ss
        gcUser' acc p  []     ss = lextop (gcUser acc) p ss



ident :: (String->TokenT) ->
          Posn -> String -> [String] ->
         (Posn->String->[String]->[Token]) -> [Token]
ident tok p s ss k =
    let (name,s0) = span (\c-> isAlphaNum c || c `elem` "`-_#.'/\\") s
    in emit (tok name) p: skip (length name) p s ss k

multiline :: ([String]->TokenT) ->
             (Posn,[String]) -> Posn -> [String] -> [Token]
multiline tok (p0,s0) p (('%':h:s):ss)
    | isSpace h    = multiline tok (p0, ({-trim-} s):s0) (newline p) ss
    | otherwise    = emit (tok (reverse s0)) p0:  gcStart gcAny (addcol 1 p) (h:s) ss
multiline tok (p0,s0) p ss =
                     emit (tok (reverse s0)) p0:  lextop gcAny p ss


Bell Labs OSI certified Powered by Plan 9

(Return to Plan 9 Home Page)

Copyright © 2021 Plan 9 Foundation. All Rights Reserved.
Comments to [email protected].