/*
* nvidia tegra 2 machine assist, definitions
* dual-core cortex-a9 processor
*
* R9 and R10 are used for `extern register' variables.
* R11 is used by the loader as a temporary, so avoid it.
*/
#include "mem.h"
#include "arm.h"
#undef B /* B is for 'botch' */
#define KADDR(pa) (KZERO | ((pa) & ~KSEGM))
#define PADDR(va) (PHYSDRAM | ((va) & ~KSEGM))
#define L1X(va) (((((va))>>20) & 0x0fff)<<2)
#define MACHADDR (L1-MACHSIZE) /* only room for cpu0's */
/* L1 pte values */
#define PTEDRAM (Dom0|L1AP(Krw)|Section|L1ptedramattrs)
#define PTEIO (Dom0|L1AP(Krw)|Section)
#define DOUBLEMAPMBS 512 /* megabytes of low dram to double-map */
/* steps on R0 */
#define DELAY(label, mloops) \
MOVW $((mloops)*1000000), R0; \
label: \
SUB.S $1, R0; \
BNE label
/* print a byte on the serial console; clobbers R0 & R6; needs R12 (SB) set */
#define PUTC(c) \
BARRIERS; \
MOVW $(c), R0; \
MOVW $PHYSCONS, R6; \
MOVW R0, (R6); \
BARRIERS
/*
* new instructions
*/
#define SMC WORD $0xe1600070 /* low 4-bits are call # (trustzone) */
/* flush branch-target cache */
#define FLBTC MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
/* flush one entry of the branch-target cache, va in R0 (cortex) */
#define FLBTSE MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtse
/* arm v7 arch defines these */
#define DSB WORD $0xf57ff04f /* data synch. barrier; last f = SY */
#define DMB WORD $0xf57ff05f /* data mem. barrier; last f = SY */
#define ISB WORD $0xf57ff06f /* instr. sync. barrier; last f = SY */
#define WFI WORD $0xe320f003 /* wait for interrupt */
#define NOOP WORD $0xe320f000
#define CLZ(s, d) WORD $(0xe16f0f10 | (d) << 12 | (s)) /* count leading 0s */
#define SETEND(o) WORD $(0xf1010000 | (o) << 9) /* o==0, little-endian */
#define CPSIE WORD $0xf1080080 /* intr enable: zeroes I bit */
#define CPSID WORD $0xf10c00c0 /* intr disable: sets I,F bits */
#define CPSAE WORD $0xf1080100 /* async abt enable: zeroes A bit */
#define CPSMODE(m) WORD $(0xf1020000 | (m)) /* switch to mode m (PsrM*) */
#define CLREX WORD $0xf57ff01f
/* floating point */
#define VMRS(fp, cpu) WORD $(0xeef00a10 | (fp)<<16 | (cpu)<<12) /* FP → arm */
#define VMSR(cpu, fp) WORD $(0xeee00a10 | (fp)<<16 | (cpu)<<12) /* arm → FP */
/*
* a popular code sequence used to write a pte for va is:
*
* MOVW R(n), TTB[LnX(va)]
* // clean the cache line
* DSB
* // invalidate tlb entry for va
* FLBTC
* DSB
* PFF (now ISB)
*/
#define BARRIERS FLBTC; DSB; ISB
/*
* invoked with PTE bits in R2, pa in R3, PTE pointed to by R4.
* fill PTE pointed to by R4 and increment R4 past it.
* increment R3 by a MB. clobbers R1.
*/
#define FILLPTE() \
ORR R3, R2, R1; /* pte bits in R2, pa in R3 */ \
MOVW R1, (R4); \
ADD $4, R4; /* bump PTE address */ \
ADD $MiB, R3; /* bump pa */ \
/* zero PTE pointed to by R4 and increment R4 past it. assumes R0 is 0. */
#define ZEROPTE() \
MOVW R0, (R4); \
ADD $4, R4; /* bump PTE address */
/*
* set kernel SB for zero segment (instead of usual KZERO segment).
* NB: the next line puts rubbish in R12:
* MOVW $setR12-KZERO(SB), R12
*/
#define SETZSB \
MOVW $setR12(SB), R12; /* load kernel's SB */ \
SUB $KZERO, R12; \
ADD $PHYSDRAM, R12
/*
* note that 5a's RFE is not the v6/7 arch. instruction (0xf8900a00),
* which loads CPSR from the word after the PC at (R13), but rather
* the pre-v6 simulation `MOVM.IA.S.W (R13), [R15]' (0xe8fd8000 since
* MOVM is LDM in this case), which loads CPSR not from memory but
* from SPSR due to `.S'.
*/
#define RFEV7(r) WORD $(0xf8900a00 | (r) << 16)
#define RFEV7W(r) WORD $(0xf8900a00 | (r) << 16 | 0x00200000) /* RFE.W */
#define RFEV7DB(r) WORD $(0xf9100a00 | (r) << 16) /* RFE.DB */
#define RFEV7DBW(r) WORD $(0xf9100a00 | (r) << 16 | 0x00200000) /* RFE.DB.W */
#define CKPSR(psr, tmp, bad)
#define CKCPSR(psrtmp, tmp, bad)
/* return with cpu id in r and condition codes set from "r == 0" */
#define CPUID(r) \
MFCP CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \
AND.S $(MAXMACH-1), r /* mask out non-cpu-id bits */
|