{-
This is the module ByteCode.Compile re-written to compile for Yhc Core instead of Pos Lambda.
-}
module ByteCode.CompileYhcCore where
import ByteCode.Type
import ByteCode.Metric
import Flags
import IntState hiding (getIntState, getFlags)
import Control.Monad.State
import Control.Monad(when)
import Core.Convert
import qualified Data.Map as Map
import Util.Extra(Pos, noPos)
import qualified Data.Set as Set
import Yhc.Core
import StrPos
import Prim
import Data.Maybe
import Data.List(isPrefixOf)
import ForeignCode
import TokenId
import SysDeps(unpackPS)
import NT
import Util.Extra
import Syntax(CallConv(..))
---------------------------------------------------------------------------------------------------------------
-- | a variable, just to save typing space
type Var = CoreVarName
-- | The state of the compilation process
data CState = CState {
csFlags :: Flags, -- ^ the overal compilation flags
-- FIXME: this probably isn't the right way to handle imports, instead we likely need to
-- reparse all the .hi files ... blurk!
csImports :: Map.Map String CoreImport, -- ^ import definitions
-- function level compiling
csConsts :: Map.Map ConstItem CRef, -- ^ the current constant table items
csThisFunc :: String, -- ^ the name of the current func (for debugging the compiler)
csNextConst :: CRef, -- ^ the next available constant reference
csNextLabel :: Label, -- ^ the next available label
csMaxDepth :: Int, -- ^ the maximum stack depth encountered
csIns :: [UseIns], -- ^ the outputted instructions
-- block level compiling
csEnv :: Map.Map Var Where, -- ^ a mapping from variables to arg\/stack locations
csDepth :: Int, -- ^ the current stack depth
csEvals :: Set.Set Var, -- ^ the set of already evaluated variables
csFails :: [(Label,Int)] -- ^ the stack of failure labels and stack depths
}
-- | compiler mode (strict or lazy)
data CMode = Strict | Lazy deriving (Eq,Show)
-- | where a variable is stored
data Where = Stack Int -- ^ on the stack
| Arg Int -- ^ as an argument
deriving Show
-- | A case pattern
{-
data Pattern = PatCon CoreCtorName [Var] -- ^ a constructor application with some variables
| PatInt Int -- ^ an integer (or character)
| PatDefault Var -- ^ a default with a variable (can be '_')
deriving Show
-}
-- | A monad for compiling
type Compiler a = State CState a
---------------------------------------------------------------------------------------------------------------
-- | Compile Yhc.Core into bytecode, top-level function
bcCompile :: Flags -- ^ compiler flags
-> IntState -- ^ internal state
-> [CoreImport] -- ^ items imported into core
-> Core -- ^ core to compile
-> BCModule -- ^ compiled bytecode
bcCompile flags state imports core = BCModule (getModuleId state) decls
where
(decls,cs') = runState (cCore core) cs
protos = Map.fromList $ map (\imp -> (coreImportName imp,imp)) $ imports ++ coreToImports core
cs = CState flags protos Map.empty "_none_" 0 0 0 [] Map.empty 0 Set.empty []
-- | convert a core to a list of imports of the items within that core
coreToImports :: Core -> [CoreImport]
coreToImports core = ictors ++ ifuncs
where
ictors = concatMap dataToCore $ coreDatas core
ifuncs = map funcToCore $ coreFuncs core
dataToCore dat = map (\ctor -> CoreImportCtor (coreCtorName ctor) dat) $ coreDataCtors dat
funcToCore fun = CoreImportFunc (coreFuncName fun) (coreFuncArity fun)
---------------------------------------------------------------------------------------------------------------
-- top-level compilation
---------------------------------------------------------------------------------------------------------------
-- | Compile the complete Yhc.Core into bytecode declarations
cCore :: Core -> Compiler [BCDecl]
cCore core = do
core <- return $ reduceRecursiveLet core
fdecls <- mapM cFunc (coreFuncs core)
cdecls <- mapM cData (coreDatas core)
return $ concat $ fdecls ++ cdecls
-- | Compile a Data declaration into bytecode declarations
cData :: CoreData -> Compiler [BCDecl]
cData cdata = zipWithM cCtor (coreDataCtors cdata) [0..]
-- | Compile a single constructor into a declaration
cCtor :: CoreCtor -- ^ the constructor to compile
-> Int -- ^ the tag of the constructor
-> Compiler BCDecl -- ^ the geenrated declaration
cCtor ctor tag = do
let arity = length $ coreCtorFields ctor
return (Con (coreCtorName ctor) noPos arity tag)
-- | Compile a function declaration into a bytecode declaration
cFunc :: CoreFunc -> Compiler [BCDecl]
cFunc func
| isCorePrim func = cPrim func
| otherwise = do
modify $ \ cs -> cs { csThisFunc = coreFuncName func }
let args = coreFuncArgs func
(ins,cstate) <- makePure $ cBody (remCorePos $ coreFuncBody func) args
let maxDepth = csMaxDepth cstate
consts = Map.fromList $ map (\(x,y) -> (y,x)) $ Map.toList (csConsts cstate)
pos = noPos
return [ Fun (coreFuncName func) pos (length args) args (CLinear ins) consts False maxDepth 0 [] ]
-- | compile the body of the a function and its arguments to bytecode instructions
cBody :: CoreExpr -> [Var] -> Compiler [UseIns]
cBody exp@(CoreCase (CoreVar v) [(PatCon c vs, CoreVar v2)]) args@[arg] = do
only <- isOnlyCon c
if only && v2 `elem` vs then do
let no = fromJust $ lookup v2 (zip vs [0..])
selUS = UseSet 0 [] (Set.singleton arg)
code = [(NEED_HEAP 1,emptyUS), (SELECTOR_EVAL,selUS), (SELECT no,emptyUS), (RETURN,emptyUS)]
return code
else
cBody' exp args
cBody exp args = cBody' exp args
-- | compile the body of a function that is definitely not a selector
cBody' :: CoreExpr -> [Var] -> Compiler [UseIns]
cBody' exp args = do
zipWithM_ (\a n -> bind a (Arg n)) args [0..]
cExpr Strict exp
emit RETURN
getIns
-- | compile a primitive function
cPrim :: CoreFunc -> Compiler [BCDecl]
cPrim prim = do
let name = coreFuncName prim
xname = name ++ "#X"
arity = corePrimArity prim
cname = corePrimExternal prim
cconv = corePrimConv prim
types = corePrimTypes prim
ext name = External name noPos arity cname cconv types
if cconv == "builtin" then
return [ext name]
else do
let args = [0..arity-1]
argNames = map show args
evals = concat $ zipWith evalArg args argNames
code = evals ++ [ (PRIMITIVE, UseSet 0 argNames (Set.fromList argNames)),
(EVAL,emptyUS),
(RETURN,emptyUS) ]
consts = Map.singleton 0 (CGlobal xname GFUN)
wrapper = Fun name noPos arity [] (CLinear code) consts True 1 0 []
evalArg i v = [ (PUSH_ARG i, UseSet 0 [v] (Set.singleton v)), (EVAL,emptyUS), (POP 1,emptyUS) ]
return [wrapper, ext xname]
-- | decide whether a constructor is the only constructor in the datatype
isOnlyCon :: CoreCtorName -> Compiler Bool
isOnlyCon c = do
dat <- lookupImportData c
return $ length (coreDataCtors dat) == 1
---------------------------------------------------------------------------------------------------------------
-- expression compilation
---------------------------------------------------------------------------------------------------------------
-------------------------------------------------------------------------
-- | compile an expression (either strictly or lazily)
cExpr :: CMode -> CoreExpr -> Compiler ()
-- a simple zero arity constructor
cExpr mode (CoreCon name) = do
pushConst (CGlobal name GZCON)
-- a simple variable
cExpr mode (CoreVar name) = do
pushVar name
evalVar mode name
-- a function (with no arguments applied)
cExpr mode (CoreFun name) = do
arity <- lookupImportArity name
if arity == 0 then
pushConst (CGlobal name GCAF)
else
pushConst (CGlobal name GFUN0)
eval mode
-- the special SEQ primitive
cExpr mode exp@(CoreApp (CoreFun "SEQ") [x,y]) = do
cExpr Strict x
emit (POP 1)
cExpr mode y
-- application to no arguments
cExpr mode exp@(CoreApp f []) = cExpr mode f
-- an application to a function/constructor
cExpr mode exp@(CoreApp f as) = do
let (f',as') = flattenCoreApp exp
num = length as'
case f' of
CoreCon c -> do
cArgs Lazy as'
ci <- addConst (CGlobal c GCON)
emit (MK_CON ci num)
CoreFun f ->
case primForFunc f of
Nothing -> do
cArgs Lazy as'
arity <- lookupImportArity f
cCallGlobal mode f num arity
Just i -> do
cArgs Strict as'
emit i
CoreVar v -> do
cArgs Lazy as'
pushVar v
emit (APPLY num)
eval mode
CoreCase _ _ -> if mode /= Strict then error "cExpr: CoreApp to CoreCase in non-strict context!"
else do
cArgs Lazy as'
cExpr Strict f'
emit (APPLY num)
emit (EVAL)
-- a case statement that is really an if
cExpr Strict (CoreCase e as)
| isJust asIf = do
let (tcase,fcase) = fromJust asIf
cExpr Strict e
[false,after] <- newLabels 2
emit (JUMP_FALSE false)
let tbranch = do { cIfBranch tcase True false after }
fbranch = do { cIfBranch fcase False false after }
branchs after "If" [tbranch, fbranch]
emit (LABEL after)
where
asIf = altsAsIf as
-- a case statement
cExpr Strict (CoreCase e as) = do
cExpr Strict e
las <- newLabels (length as)
after <- newLabel
let as' = zipWith (\(p,e) l -> (l,p,e)) as las
(isInt,complete,ts,def) <- decomposeAlts as'
emit (CASE isInt (zip ts las) def)
let calts = map (cAlt after) as'
branchs after "Case" calts
emit (LABEL after)
-- a let statement used for a failure case
cExpr mode exp@(CoreLet [(name,bind)] e)
| isFailure name = do
lfail <- newLabel
lexit <- newLabel
let ce = withFailure lfail $ do { cExpr mode e ; emit (JUMP lexit) ; return True }
cf = do { emit (LABEL lfail) ; cExpr mode bind ; return True }
branchs lexit "Failure" [ce,cf]
emit (LABEL lexit)
-- a let statement
cExpr mode exp@(CoreLet bs e)
| isCoreLetRec exp = do
emit (ALLOC n)
zipWithM_ (\(v,_) n -> bind v (Stack n)) bs [0..]
zipWithM_ (cBinding True) bs [0..]
cExpr mode e
emit (SLIDE n)
| otherwise = do
zipWithM_ (cBinding False) bs [0..]
cExpr mode e
emit (SLIDE n)
where
n = length bs
-- an int
cExpr mode (CoreLit lit) =
case lit of
CoreInt i -> if isShort i then emit (PUSH_INT i)
else pushConst (CInt i)
CoreInteger i -> pushConst (CInteger i)
CoreChr c -> emit (PUSH_CHAR $ fromEnum c)
CoreStr s -> do { pushConst (CString s) ; emit P_STRING }
CoreFloat f -> pushConst (CFloat f)
CoreDouble d -> pushConst (CDouble d)
-------------------------------------------------------------------------
-- | compile the arguments to a function
cArgs :: CMode -> [CoreExpr] -> Compiler ()
cArgs mode args = mapM_ (cExpr mode) (reverse args)
-- | emit instructions to call a global function
cCallGlobal :: CMode -- ^ mode to compile in (strict or lazy)
-> Var -- ^ the name of the function to call
-> Int -- ^ the number of arguments given
-> Int -- ^ the number of arguments expected
-> Compiler ()
cCallGlobal mode f got expect
-- saturated or super-saturated case
| got >= expect = do
let extra = got - expect
-- do a MK_AP or PUSH_CONST depending on expected arity
if expect > 0 then do
fi <- addConst (CGlobal f GFUN)
emit (MK_AP fi expect)
else do
fi <- addConst (CGlobal f GCAF)
emit (PUSH_CONST fi)
-- apply extra arguments if needed
when (extra > 0) $ do
emit (APPLY extra)
-- eval the result if needed
eval mode
-- partial application
| otherwise = do
fi <- addConst (CGlobal f GFUN)
emit (MK_PAP fi got)
-- | compile an if branch
cIfBranch :: CoreExpr -> Bool -> Label -> Label -> Compiler Bool
cIfBranch expr isTrue false after = do
when (not isTrue) $ do
emit (LABEL false)
let failure = isCoreVar expr && isFailure (fromCoreVar expr)
if failure then cFail
else do
cExpr Strict expr
when isTrue $ emit (JUMP after)
return True
-- | an internal alternative
type Alt = (Label,CorePat,CoreExpr)
-- | compile an alternative
cAlt :: Label -> -- the label for the end of the case statement
Alt -> -- the label for the start, the pattern and the expression
Compiler Bool -- returns whether result was normal exit (non-failure)
cAlt after (label,pat,expr) = do
emit (LABEL label)
let failure = isCoreVar expr && isFailure (fromCoreVar expr)
if failure then do
cFail
else do
case pat of
PatCon c vs -> do
let n = length vs
emitUse (UNPACK n) vs Set.empty
zipWithM_ (\v n -> bind v (Stack n)) vs [0..]
cExpr Strict expr
emit (SLIDE n)
_ -> do
emit (POP 1)
cExpr Strict expr
emit (JUMP after)
return True
-- | compile a let binding (either for recursive or non-recursive let)
cBinding :: Bool -> (CoreVarName,CoreExpr) -> Int -> Compiler ()
cBinding rec (v,e) n = do
cExpr Lazy e
if rec then do
emitUse (UPDATE n) [] (Set.singleton v)
else
bind v (Stack 0)
-- | compile a failure
cFail :: Compiler Bool
cFail = do
(lfail,fDepth) <- getFailure
depth <- getDepth
let err = error $ "cFail: failure: depth = "++show depth++", fail depth = "++show fDepth
num = if depth < fDepth then err else depth - fDepth
emit (POP num)
emit (JUMP lfail)
return False
-- | return the special primitive instruction associated with a function, or Nothing
primForFunc :: CoreFuncName -> Maybe Ins
primForFunc fun = lookup fun prims
where
ops = [ (ADD,P_ADD), (SUB,P_SUB), (MUL,P_MUL), (SLASH,P_DIV), (CMP_EQ,P_CMP_EQ), (CMP_NE,P_CMP_NE),
(CMP_LE,P_CMP_LE), (CMP_LT,P_CMP_LT), (CMP_GE,P_CMP_GE), (CMP_GT,P_CMP_GT), (NEG,P_NEG) ]
others = [ (ORD,P_FROM_ENUM), (STRING,P_STRING), (QUOT,P_DIV OpWord), (REM,P_MOD OpWord) ]
prims = map prim $ [ (p k,i k) | (p,i) <- ops, k <- [OpWord,OpFloat,OpDouble] ] ++ others
prim (prim,ins) = (strPrim prim,ins)
-- | turn a nested application of CoreApp into a function and list of arguments
flattenCoreApp :: CoreExpr -> (CoreExpr,[CoreExpr])
flattenCoreApp (CoreApp f as) = let (f',as') = flattenCoreApp f in (f',as'++as)
flattenCoreApp f = (f,[])
-- | decompose a list of alternatives into (int-case,complete,tags,default)
-- where int-case is true if this is an int-case, complete is true if the case is complete,
-- tags is the list of constructor tag numbers and default is the default case (if there is one)
decomposeAlts :: [Alt] -> Compiler (Bool,Bool,[Tag],Maybe Label)
decomposeAlts as =
case ndefs of
(_,PatLit c,e):_ -> do
let tagof (PatLit c) = case c of { CoreInt i -> i ; CoreChr c -> fromEnum c }
tags = map (\(_,p,_) -> tagof p) ndefs
return (True,False,tags,ldef)
(_,PatCon c vs,e):_ -> do
dat <- lookupImportData c
let constrs = map coreCtorName $ coreDataCtors dat
ncons = zip constrs [0..]
complete = length as == length constrs
tags = map (\(_,PatCon c _, _) -> fromJust $ lookup c ncons) ndefs
return (False,complete,tags,ldef)
where
(ndefs,defs) = break (\(_,p,_) -> isPatDefault p) as
ldef = case defs of
[] -> Nothing
(label,_,_):_ -> Just label
-- | decides whether a list of core alternatives are really an if
altsAsIf :: [(CorePat,CoreExpr)] -> Maybe (CoreExpr,CoreExpr)
altsAsIf alts =
case boolalts of
[] -> Nothing -- this is not a boolean case
[a] -> let (abool,aexpr) = a
in if abool then Just (aexpr,def)
else Just (def,aexpr)
[a,b] -> let (abool,aexpr) = a
(bbool,bexpr) = b
in if abool then Just (aexpr,bexpr)
else Just (bexpr,aexpr)
_ -> error $ "altsAsIf: more than 2 alternatives for something that is casing over booleans!?"
where
boolalts = concat $ map asBooleanPat alts
def = head [ e | (PatDefault,e) <- alts ]
asBooleanPat (pat,expr) = case pat of
PatCon "Prelude;True" [] -> [(True,expr)]
PatCon "Prelude;False" [] -> [(False,expr)]
_ -> []
-- | introduce instructions to push a constant
pushConst :: ConstItem -> Compiler ()
pushConst c = do
i <- addConst c
emit (PUSH_CONST i)
-- | push a variable on the stack
pushVar :: Var -> Compiler ()
pushVar v = do
whr <- whereIsVar v
case whr of
Arg n -> emitUse (PUSH_ARG n) [v] (Set.singleton v)
Stack n -> emitUse (PUSH n) [v] (Set.singleton v)
---------------------------------------------------------------------------------------------------------------
-- monadic helpers
---------------------------------------------------------------------------------------------------------------
-- | get the instructions
getIns :: Compiler [UseIns]
getIns = gets (reverse . csIns)
-- | get the current depth
getDepth :: Compiler Int
getDepth = gets csDepth
-- | emit an instruction
emit :: Ins -> Compiler ()
emit i = emitUse i [] Set.empty
-- | emit an instruction, with usage information
emitUse :: Ins -> [Var] -> Set.Set Var -> Compiler ()
emitUse i give need = do
let d = imStack $ bcodeMetric i
shiftStack d
depth <- gets csDepth
let ius = (i, UseSet depth give need)
modify $ \ cs -> cs { csIns = ius : csIns cs }
-- | shift all the (Stack n) variables on the stack, also update the depth
shiftStack :: Int -> Compiler ()
shiftStack n = modify $ \ cs ->
let depth = csDepth cs + n
maxDepth = max (csMaxDepth cs) depth
env = Map.map shiftWhere (csEnv cs)
in cs { csEnv = env, csDepth = depth, csMaxDepth = maxDepth }
where
shiftWhere (Stack x) = Stack (x+n)
shiftWhere w = w
-- | evaluate a variable if it hasn't been evaluated already
evalVar :: CMode -> Var -> Compiler ()
evalVar Lazy v = return ()
evalVar Strict v = do
evaled <- gets $ \ cs -> v `Set.member` (csEvals cs)
when (not evaled) $ do
emit EVAL
modify $ \ cs -> cs { csEvals = Set.insert v (csEvals cs) }
-- | emit an EVAL instruction, if the mode is strict
eval :: CMode -> Compiler ()
eval Lazy = return ()
eval Strict = emit EVAL
-- | takes a compiler function and changes it so that it doesn't actually modify the state
makePure :: Compiler a -> Compiler (a,CState)
makePure f = do
cs <- get
return (runState f cs)
-- | takes a compiler and executes it as a new function body
asNewFunc :: Compiler a -> Compiler a
asNewFunc f = do { cs <- get ; x <- f ; modify (const cs) ; return x }
-- | take a list of compilers for different branches, run them all and then merge the results
-- the compiler returns a bool indicating whether it returns normally, if it doesn't return normally
-- then it doesn't need to have a depth matching the other branchs.
branchs :: Label -> String -> [Compiler Bool] -> Compiler ()
branchs label debug cs = do
mcss <- mapM inNewEnv cs
modify $ \ cs ->
let css = catMaybes mcss
evs = csEvals cs `Set.union` (intersectManySets $ map csEvals css)
dps = map csDepth css
cs' = cs { csEvals = evs, csDepth = head dps }
in if dps == [] then cs
else if all (== (head dps)) dps then cs'
else error (csThisFunc cs' ++": L_"++show label++": "++debug++" depths not all equal"++show dps)
where
inNewEnv c = do
cs <- get
x <- c
State $ \ cs' -> (if x then Just cs' else Nothing, cs' { csEnv = csEnv cs, csDepth = csDepth cs, csEvals = csEvals cs })
{-
-- | take a compiler for a branching operation and execute the compiler recording the stack depth on the way out
branch :: Compiler () -> Compiler Int
branch c = do
cs <- get
c
State $ \ cs' -> let cs'' = cs' { csEnv = csEnv cs, csFails = csFails cs, csEvals = csEvals cs, csDepth = csDepth cs }
in (csDepth cs', cs'')
-- | merge together a list of depths taken from branching, checks they are all the same
-- and sets the depth to that
mergeDepths :: Label -> String -> [Int] -> Compiler ()
mergeDepths lab kind (i:is)
| and (map (==i) is) = modify $ \ cs -> cs { csDepth = i }
| otherwise = trace ("L_" ++ show lab++":"++kind++" depths don't match on merge depths "++show (i:is)) $ return ()
-}
-- | bind a variable to a location
bind :: Var -> Where -> Compiler ()
bind var wh = modify $ \ cs -> cs { csEnv = Map.insert var wh (csEnv cs) }
-- | add a constant to the constant table (if not present) and return its reference
addConst :: ConstItem -> Compiler CRef
addConst item = do
old <- gets $ \ cs -> Map.lookup item (csConsts cs)
case old of
Just ref -> return ref
Nothing -> State $ \ cs -> let i = csNextConst cs
cs' = cs { csConsts = Map.insert item i (csConsts cs), csNextConst = i+1 }
in (i,cs')
-- | allocate a new label
newLabel :: Compiler Label
newLabel = State $ \ cs -> let i = csNextLabel cs
in (i, cs { csNextLabel = i+1 })
-- | allocate several new labels
newLabels :: Int -> Compiler [Label]
newLabels n = replicateM n newLabel
-- | run a compiler in a new failure block
withFailure :: Label -> Compiler a -> Compiler a
withFailure lfail comp = do
modify $ \ cs -> cs { csFails = (lfail, csDepth cs) : csFails cs }
x <- comp
modify $ \ cs -> cs { csFails = tail (csFails cs) }
return x
-- | get the most current failure
getFailure :: Compiler (Label,Int)
getFailure = gets $ \ cs -> case csFails cs of
(f:_) -> f
[] -> error "getFailure: fail called but no failure closure present!"
-- | return where a variable can be found
whereIsVar :: Var -> Compiler Where
whereIsVar v = gets $ \ cs ->
case Map.lookup v (csEnv cs) of
Just w -> w
Nothing -> error $ "whereIsVar: no variable "++v++" in environment"
-- | lookup an import
lookupImport :: String -> Compiler CoreImport
lookupImport name = gets $ \ cs ->
case Map.lookup name (csImports cs) of
Just imp -> imp
Nothing -> error $ "Compiling Yhc.Core to bytecode: the name '"++name++
"' is referenced but is neither local nor specified as an import"
-- | lookup a data constructor
lookupImportData :: String -> Compiler CoreData
lookupImportData name = do
imp <- lookupImport name
case imp of
CoreImportCtor{} -> return (coreImportCtorData imp)
_ -> error $ "Compiling Yhc.Core to bytecode: the name '"++name++"' is used as a constructor, but it isn't one"
-- | lookup the arity of a name
lookupImportArity :: String -> Compiler Int
lookupImportArity name = do
imp <- lookupImport name
case imp of
CoreImportFunc{} -> return $ coreImportFuncArity imp
CoreImportCtor{} ->
let ctors = coreDataCtors $ coreImportCtorData imp
in case lookup name $ map (\ctor -> (coreCtorName ctor,ctor)) ctors of
Just ctor -> return $ length $ coreCtorFields ctor
-- | test whether an expression a failure variable
isFailure :: Var -> Bool
isFailure = ("v_fail_" `isPrefixOf`)
-- | calculate the intersection of many sets (analogous to unionManySets)
intersectManySets :: Ord a => [Set.Set a] -> Set.Set a
intersectManySets [] = Set.empty
intersectManySets xs = foldr1 Set.intersection xs