historical/m0-applesillicon.git/xnu-qemu-arm64-5.1.0/roms/SLOF/board-js2x/llfw/startup.S
2024-01-16 11:20:27 -06:00

708 lines
16 KiB
ArmAsm

/******************************************************************************
* Copyright (c) 2004, 2008 IBM Corporation
* All rights reserved.
* This program and the accompanying materials
* are made available under the terms of the BSD License
* which accompanies this distribution, and is available at
* http://www.opensource.org/licenses/bsd-license.php
*
* Contributors:
* IBM Corporation - initial implementation
*****************************************************************************/
# SLOF for JS20/JS21 -- ROM boot code.
# Initial entry point, copy code from flash to cache, memory setup.
# Also sets up serial console and optimizes some settings.
#include "termctrl.h"
#include <product.h>
#include <xvect.h>
#include <cpu.h>
#include <macros.h>
#include <southbridge.h>
.text
.globl __start
__start:
/* put rombase in sprg1 ***********************/
bl postHeader
.long 0xDEADBEE0
.long 0x0 /* size */
.long 0x0 /* crc */
.long relTag - __start
postHeader:
mflr r3
li r4, 0x7fff
not r4, r4
and r3, r3, r4
mtsprg 1, r3 /* romfs base */
bl _start
.org 0x150 - 0x100
__startSlave:
bl setup_cpu
bl set_ci_bit
# b slaveWithNumber
b slave
.org 0x180 - 0x100
__startMaster:
li 3,0
mtsprg 1, r3 /* romfs base */
bl setup_cpu
bl set_ci_bit
b master
/* FIXME: Also need 0280, 0380, 0f20, etc. */
.irp i, 0x0100,0x0180,0x0200,0x0280,0x0300,0x0380,0x0400,0x0500,0x0600,0x0700, \
0x0800,0x0900,0x0a00,0x0b00,0x0c00,0x0d00,0x0e00,0x0f00, \
0x1000,0x1100,0x1200,0x1300,0x1400,0x1500,0x1600,0x1700, \
0x1800,0x1900,0x1a00,0x1b00,0x1c00,0x1d00,0x1e00,0x1f00, \
0x2000,0x2100,0x2200,0x2300,0x2400,0x2500,0x2600,0x2700, \
0x2800,0x2900,0x2a00,0x2b00,0x2c00,0x2d00,0x2e00
. = \i
/* enable this if you get exceptions before the console works */
/* this will allow using the hardware debugger to see where */
/* it traps, and with what register values etc. */
// b $
mtsprg 0, r0
mfctr r0
mtsprg 2,r0
mflr r0
// 10
mtsprg 3,r0
ld r0, (\i + 0x160)(0)
mtctr r0
li r0, \i + 0x100
// 20
bctr
. = \i + 0x60
.quad intHandler2C
.endr
. = XVECT_M_HANDLER - 0x100
.quad 0x00
. = XVECT_S_HANDLER - 0x100
.quad 0
.org 0x4000 - 0x100
_start:
# optimize HID register settings
bl setup_cpu
bl set_ci_bit
# read semaphore, run as slave if not the first to do so
li 3,0 ; oris 3,3,0xf800 ; lwz 3,0x60(3) ; andi. 3,3,1 ; beq slave
master:
# setup flash, serial
bl setup_sio
# early greet
li r3, 10
bl putc
li 3,13 ; bl putc ; li 3,10 ; bl putc ; li 3,'S' ; bl putc
#do we run from ram ?
mfsprg r3, 1 /* rombase */
cmpdi r3,0 /* rombase is 0 when running from RAM */
bne copy_to_cache
# wait a bit, start scripts are slow... need to get all cores running!
lis 3,0x4000 ; mtctr 3 ; bdnz $
# copy 4MB from 0 to temp memory
lis 4,0x8 ; mtctr 4 ; lis 4,0x200 ; li 3,0 ; addi 4,4,-8 ; addi 3,3,-8
0: ldu 5,8(3) ; stdu 5,8(4) ; bdnz 0b
lis 4,0x200
mtsprg 1, r4
lis 4,0x1
lis 3,0x20 ; addi 3,3,0x200-8 ;
FLUSH_CACHE(r3, r4)
lis 4,0x200
addi 4,4,copy_to_cache@l
mtctr 4
bctr
# make all data accesses cache-inhibited
set_ci_bit:
SETCI(r0)
blr
# make all data accesses cacheable
clr_ci_bit:
CLRCI(r0)
blr
# write a character to the serial port
putc:
# always write to serial1
0: lbz 0,5(13) ; andi. 0,0,0x20 ; beq 0b ; stb 3,0(13) ; eieio
# read ID register: only if it is a PC87427 (JS21) also use serial2
li 4,0 ; oris 4,4,0xf400
li 5,0x20 ; stb 5,0x2e(4) ; lbz 5,0x2f(4); cmpdi 5,0xf2 ; bne 1f
addi 4,4,0x2f8
0: lbz 0,5(4) ; andi. 0,0,0x20 ; beq 0b ; stb 3,0(4) ; eieio
1: blr
# transfer from running from flash to running from cache
return_cacheable:
# find and set address to start running from cache, set msr value
mflr 3 ; rldicl 3,3,0,44
jump_cacheable:
mtsrr0 3 ;
mfmsr 3 ; ori 3,3,0x1000 ; mtsrr1 3 # enable MCE, as well
# set cacheable insn fetches, jump to cache
mfspr 3,HID1 ; rldicl 3,3,32,0 ; oris 3,3,0x0020 ; rldicl 3,3,32,0
sync ; mtspr HID1,3 ; mtspr HID1,3 ; rfid ; b .
copy_to_cache:
# zero the whole cache
# also, invalidate the insn cache, to clear parity errors
# 128kB @ 0MB (boot code and vectors from 0x0 up to 0x20000)
li 4,0x400 ; mtctr 4 ; li 5,0x0 ; bl clr_ci_bit
0: dcbz 0,5 ; sync ; icbi 0,5 ; sync ; isync ; addi 5,5,0x80 ; bdnz 0b
# 0x2000 to 0x100000/0x80000 (smaller on 970/970FX)
li 4,0x1C00 ; mfpvr 0 ; srdi 0,0,16 ; cmpdi 0,0x0044 ; bge 0f ; li 4,0xC00
0:
mtctr 4 ; li 5,0x2000
0: dcbz 0,5 ; sync ; isync ; addi 5,5,0x80 ; bdnz 0b ; bl set_ci_bit
# find base address
bcl 20,31,$+4 ; mflr 31 ; rldicr 31,31,0,43
# copy 1kB from 0x4000
li 4,0x80 ; mtctr 4 ;
li 5,0x3ff8
addi 3,31,0x3ff8
0: ldu 4,8(3) ; bl clr_ci_bit ; stdu 4,8(5) ; bl set_ci_bit ; bdnz 0b
# now start executing from cache -- insn cache is huge speed boost
bl return_cacheable
li 3,'L' ; bl putc
# copy 128kB of flash to cache
li 4,0x800 ; mtctr 4 ; li 5,0x200-64 ; addi 3,31,0x200-64 ;
0: ldu 16,64(3) ; ld 17,8(3) ; ld 18,16(3) ; ld 19,24(3)
ld 20,32(3) ; ld 21,40(3) ; ld 22,48(3) ; ld 23,56(3)
bl clr_ci_bit
stdu 16,64(5) ; std 17,8(5) ; std 18,16(5) ; std 19,24(5)
std 20,32(5) ; std 21,40(5) ; std 22,48(5) ; std 23,56(5)
icbi 0,5 ; bl set_ci_bit ; bdnz 0b ; isync
li 3,'O' ; bl putc
lis 4,0x20
mfsprg r3,1
cmpd r3,r4
beq 1f
// at 0xf8000000 we decide if it is u3 or u4
li 4,0 ; oris 4,4,0xf800 ; lwz 3,0(4) ; srdi 3,3,4 ; cmpdi 3,3 ; bne 0f
bl setup_mem_u3
bl setup_mem_size
b 1f
0:
1:
li 3,'F' ; bl putc
# setup nvram logging only when not running from RAM
mfsprg r3, 1 /* rombase */
cmpdi r3, 0 /* rombase is 0 when running from RAM */
beq 0f
// at 0xf8000000 we decide if it is u3 or u4
li r4, 0
oris r4, r4, 0xf800
lwz r3, 0(r4)
srdi r3, r3, 4
cmpdi r3, 3 /* 3 means js20; no nvram logging on js20 */
beq 0f
bl io_log_init
0:
#bl print_mem
# data is cacheable by default from now on
bl clr_ci_bit
/* give live sign *****************************/
bl 0f
.ascii TERM_CTRL_RESET
.ascii TERM_CTRL_CRSOFF
.ascii " **********************************************************************"
.ascii "\r\n"
.ascii TERM_CTRL_BRIGHT
.ascii PRODUCT_NAME
.ascii " Starting\r\n"
.ascii TERM_CTRL_RESET
.ascii " Build Date = ", __DATE__, " ", __TIME__
.ascii "\r\n"
.ascii " FW Version = " , RELEASE
.ascii "\r\n\0"
.align 2
0: mflr r3
bl io_print
# go!
li r3,__startC@l
mtctr r3
mfsprg r10, 1
bctrl
relTag:
.ascii RELEASE
.ascii "\0"
.align 2
slave:
# get cpu number
li 3,0 ; oris 3,3,0xf800 ; lwz 28,0x50(3)
slaveWithNumber:
# create our slave loop address
sldi 3,28,24 ; oris 3,3,0x3000
# invalidate the insn cache, to clear parity errors
# clear the L2 cache as well, to get ECC right
li 4,0x2000 ; mfpvr 0 ; srdi 0,0,16 ; cmpdi 0,0x0044 ; bge 0f ; li 4,0x1000
0: mtctr 4 ; mr 5,3 ; bl clr_ci_bit
0: dcbz 0,5 ; sync ; icbi 0,5 ; sync ; isync ; addi 5,5,0x80 ; bdnz 0b
# write a "b $" insn in there
lis 4,0x4800 ; stw 4,0(3)
/*
mr 5,3
# jump there
bl set_ci_bit
li 13,0 ; oris 13,13,0xf400
# device address
addi 13,13,0x2f8
li 3,'O' ; add 3,3,28 ; bl putc
bl clr_ci_bit
mr 3,5
*/
b jump_cacheable
# allow the flash chip to be accessed faster
# initialize the 16550-compatible uart on serial port 1 of the sio
setup_sio:
# i/o base address
li 3,0 ; oris 3,3,0xf400
# i/o base address
li 3,0 ; oris 3,3,0xf400
# put x-bus in turbo mode
li 4,0xf1 ; stb 4,0x400(3) ; eieio
# select sio serial1
li 4,7 ; stb 4,0x2e(3) ; eieio ; li 4,3 ; stb 4,0x2f(3) ; eieio
# set base address to 3f8
li 4,0x60 ; stb 4,0x2e(3) ; eieio ; li 4,3 ; stb 4,0x2f(3) ; eieio
# enable device
li 4,0x30 ; stb 4,0x2e(3) ; eieio ; li 4,1 ; stb 4,0x2f(3) ; eieio
# read ID register: only if it is a PC87427, enable serial2
li 4,0x20 ; stb 4,0x2e(3) ; eieio ; lbz 4,0x2f(3) ; cmpdi 4,0xf2 ; bne 0f
# select sio serial2
li 4,7 ; stb 4,0x2e(3) ; eieio ; li 4,2 ; stb 4,0x2f(3) ; eieio
# set base address to 2f8
li 4,0x60 ; stb 4,0x2e(3) ; eieio ; li 4,2 ; stb 4,0x2f(3) ; eieio
# enable device
li 4,0x30 ; stb 4,0x2e(3) ; eieio ; li 4,1 ; stb 4,0x2f(3) ; eieio
# uart @0x2f8
addi 3,3,0x2f8
# disable interrupts, fifo off
li 4,0 ; stb 4,1(3) ; eieio ; stb 4,2(3) ; eieio
# set serial speed
li 4,0x80 ; stb 4,3(3) ; eieio
li 4,115200/19200 ; stb 4,0(3) ; eieio ; li 4,0 ; stb 4,1(3) ; eieio
# set 8-N-1, set RTS and DTR
li 4,3 ; stb 4,3(3) ; eieio ; stb 4,4(3) ; eieio
eieio
addi 3,3,-0x2f8
# uart @0x3f8
0: addi 3,3,0x3f8
# disable interrupts, fifo off
li 4,0 ; stb 4,1(3) ; eieio ; stb 4,2(3) ; eieio
# set serial speed
li 4,0x80 ; stb 4,3(3) ; eieio
li 4,115200/19200 ; stb 4,0(3) ; eieio ; li 4,0 ; stb 4,1(3) ; eieio
# set 8-N-1, set RTS and DTR
li 4,3 ; stb 4,3(3) ; eieio ; stb 4,4(3) ; eieio
eieio
# save UART base for putc routine
0: mr 13,3
blr
# set the HID registers of the 970 for optimally executing from flash
setup_cpu:
/* clear all the HV cruft */
li r0, 0
sync
mtspr HID4, r0
isync
/* enable dpm, disable attn insn, enable external mce
* first, try external time base; if clock doesn't run, switch to
* internal */
li r0, 1 /* do the setup for external timebase */
rldicl r0, r0, 44, 0 /* bit 19 has to be set */
oris r0, r0, 0x8000 /* Enable external machine check */
/* interrupts (preferred state */
/* equals `1'). */
sync
mtspr HID0, r0
isync
mftb r3 /* read the timebase */
li r1, 0x4000 /* wait long enough for the external */
mtctr r1 /* timebase (14MHz) to tick a bit */
bdnz $ /* 0x4000 seems to be enough (for now) */
mftb r4 /* read the timebase a second time */
cmpld r3, r4 /* see if it changed */
bne 0f
/* timebase did not change, do the setup for internal */
rldicl r0, r0, 19, 1
rldicl r0, r0, 45, 0
sync
mtspr HID0, r0
isync
0:
/* enable insn prefetch, speculative table walks */
mfspr r0, HID1
rldicl r0, r0, 20, 0
ori r0, r0, 0x1002
mfsprg r3, 1 /* read rombase */
cmpdi r3, 0 /* check if running from ram */
bne 0f
/* running from ram */
/* Enable instruction fetch cacheability control */
ori r0, r0, 0x200
0:
rldicl r0, r0, 44, 0
sync
mtspr HID1, r0
isync
/* enable cache parity */
mfspr r0, HID4
oris r0, r0, 0xfff0
xoris r0, r0, 0xfff0
sync
mtspr HID4, r0
isync
/* exception offset at 0 */
li r3, 0
mtspr HIOR, r3
blr
C_ENTRY(proceedInterrupt)
ld r3,exception_stack_frame@got(r2)
ld r1,0(r3)
.irp i, 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, \
27, 28, 29, 30, 31
ld r\i, 0x30+\i*8 (r1)
.endr
ld r14,0x138(r1);
mtsrr0 r14
ld r14,0x140(r1);
mtsrr1 r14
ld r14,0x148(r1);
mtcr r14
ld 0,XVECT_M_HANDLER(0)
mtctr 0
ld r0,0x30(r1); # restore vector number
ld r1,0x38(r1);
bctr
intHandler2C:
mtctr r1 # save old stack pointer
lis r1,0x4
stdu r1, -0x160(r1)
.irp i, 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, \
27, 28, 29, 30, 31
std r\i, 0x30+\i*8 (r1)
.endr
std r0,0x30(r1); # save vector number
mfctr r14
std r14,0x38(r1); # save old r1
mfsrr0 r14
std r14,0x138(r1);
mfsrr1 r14
std r14,0x140(r1);
mfcr r14
std r14,0x148(r1);
mfxer r14
std r14,0x150(r1);
bl toc_init
ld r3,exception_stack_frame@got(r2)
std r1,0(r3)
mr r3,r0
bl .c_interrupt
ld r14,0x138(r1);
mtsrr0 r14
ld r14,0x140(r1);
mtsrr1 r14
ld r14,0x148(r1);
mtcr r14
ld r14,0x150(r1);
mtxer r14
.irp i, 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16, \
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, \
27, 28, 29, 30, 31
ld r\i, 0x30+\i*8 (r1)
.endr
ld r1,0x38(r1);
mfsprg r0,2
mtctr r0
mfsprg r0,3
mtlr r0
mfsprg r0,0
rfid
/* Set exception handler for given exception vector.
r3: exception vector offset
r4: exception handler
*/
.globl .set_exception
.set_exception:
.globl set_exception
set_exception:
ld r4,0x0(r4)
.globl .set_exception_asm
.set_exception_asm:
.globl set_exception_asm
set_exception_asm:
std r4, 0x60(r3) # fixme diff 1f - 0b
blr
setup_mem_u3:
li 4,0x2000 ; oris 4,4,0xf800
# MemTimingParam -- CAS lat 2.5 / 4 (read-to-read / read-to-write)
lis 3,0x49e1 ; ori 3,3,0xa000 ; stw 3,0x50(4)
# MRSRegCntl -- CAS lat 2.5
li 3,0x6a ; stw 3,0xf0(4)
# MemBusConfig -- 128 bit bus
lis 3,0x8500 ; stw 3,0x190(4)
# CKDelAdj -- clock delay 75
lis 3,0x12c3 ; ori 3,3,0x30cc ; stw 3,0x520(4)
# IOModeCntl -- no termination on differential and 3-state drivers
lis 3,0x0350 ; stw 3,0x530(4)
li 3,18 ; mtctr 3 ; addi 5,4,0x5f0
0: # DQSDelAdj -- read delay offset -10
lis 3,0x3d8f ; ori 3,3,0x6000 ; stwu 3,0x10(5)
# DQSDataDelAdj -- write delay offset -32, write data delay offset +15
lis 3,0x380e ; ori 3,3,0x003c ; stwu 3,0x10(5)
bdnz 0b
# MemProgCntl -- set all
lis 3,0xc000 ; stw 3,0xe0(4)
eieio
blr
# read dimm SPDs, program memory size and type
setup_mem_size:
mflr 14
li 15,0 ; oris 15,15,0xf800 ; li 17,0
li 3,0xa0 ; li 4,3 ; li 5,3 ; bl i2c_read
mr 16,4 ; cmpdi 3,0 ; beq 0f ; li 16,0
0: li 3,0xa2 ; li 4,3 ; li 5,3 ; bl i2c_read
cmpd 16,4 ; bne 0f ; cmpdi 3,0 ; beq 1f
0: li 16,0x1e00
1: #li 3,0xd ; bl print_byte ; li 3,0xa ; bl print_byte
#mr 3,16 ; bl print_hex
#li 3,0x20 ; bl print_byte
sldi 3,16,7 ; add 3,3,16 ; rlwinm 3,3,10,0,6 ; subis 3,3,0x3c00
stw 3,0x21c0(15) ; andi. 0,16,2 ; beq 0f ; stw 3,0x21e0(15)
0: #bl print_hex
sldi 3,16,8 ; add 3,3,16 ; rldicl 3,3,48,56 ; li 0,8 ; slw 3,0,3
# slw, not sld, so that empty/bad banks translate into size 0
stw 17,0x21d0(15) ; bl add17173 ; stw 17,0x21f0(15)
andi. 0,16,2 ; beq 0f ; bl add17173
0: #bl print_hex
li 3,0xa4 ; li 4,3 ; li 5,3 ; bl i2c_read
mr 16,4 ; cmpdi 3,0 ; beq 0f ; li 16,0
0: li 3,0xa6 ; li 4,3 ; li 5,3 ; bl i2c_read
cmpd 16,4 ; bne 0f ; cmpdi 3,0 ; beq 1f
0: li 16,0x1e00
1: #li 3,0xd ; bl print_byte ; li 3,0xa ; bl print_byte
#mr 3,16 ; bl print_hex
#li 3,0x20 ; bl print_byte
sldi 3,16,7 ; add 3,3,16 ; rlwinm 3,3,10,0,6 ; subis 3,3,0x3c00
stw 3,0x2200(15) ; andi. 0,16,2 ; beq 0f ; stw 3,0x2220(15)
0: #bl print_hex
sldi 3,16,8 ; add 3,3,16 ; rldicl 3,3,48,56 ; li 0,8 ; slw 3,0,3
stw 17,0x2210(15) ; bl add17173 ; stw 17,0x2230(15)
andi. 0,16,2 ; beq 0f ; bl add17173
0: #bl print_hex
#mr 3,17 ; bl print_hex
stw 17,0x2250(15) ; stw 17,0x2270(15)
stw 17,0x2290(15) ; stw 17,0x22b0(15)
mtlr 14
blr
# print GPR3 as 8-digit hex. uses GPR18,19
print_hex:
mflr 18 ; mr 19,3 ; li 3,8 ; mtctr 3
1: rlwinm 3,19,4,28,31 ; sldi 19,19,4
cmpdi 3,0xa ; blt 0f ; addi 3,3,0x27
0: addi 3,3,0x30 ; bl putc
bdnz 1b ; mtlr 18 ; blr
# i2c stuff uses GPR20..GPR24
# terminate any i2c transaction, at any point during that transaction
i2c_stop:
0: lwz 3,0x30(20) ; stw 3,0x30(20) ; andi. 3,3,4 ; beq 0b
mr 3,21 ; mr 4,22 ; mtlr 24 ; eieio ; blr
# do a combined-mode read
# in: GPR3 = addr, GPR4 = subaddr, GPR5 = len
# out: GPR3 = error, GPR4 = result (right-aligned, msb)
i2c_read:
mflr 24
li 20,0x1000 ; oris 20,20,0xf800 # uni-n i2c base
mr 21,3 ; mr 22,4 ; mr 23,5 # save params
li 4,0xc ; stw 4,0(20) # set mode (combined)
ori 4,21,1 ; stw 4,0x50(20) # set addr, read
stw 22,0x60(20) # set subaddr
li 4,2 ; stw 4,0x10(20) ; eieio # start address phase
li 21,1 # error
li 22,0 # result accumulator
0: lwz 3,0x30(20) ; andi. 3,3,2 ; beq 0b # wait until sent
lwz 3,0x20(20) ; andi. 3,3,2 ; beq i2c_stop # check result
li 4,1 ; cmpdi 23,1 ; bne 0f ; li 4,0
0: stw 4,0x10(20) # AAK for next byte (or not)
li 4,2 ; stw 4,0x30(20) ; eieio # ack address phase
i2c_read_loop:
lwz 3,0x30(20) ; andi. 3,3,1 ; beq 1f # if byte recv'd:
subi 23,23,1 ; sldi 22,22,8 # shift byte accum
lwz 3,0x70(20) ; rlwimi 22,3,0,24,31 # get byte
cmpdi 23,0 ; bne 0f ; li 21,0 ; b i2c_stop # all done
0: li 4,1 ; cmpdi 23,1 ; bne 0f ; li 4,0
0: stw 4,0x10(20) # AAK for next byte (or not)
li 4,1 ; stw 4,0x30(20) ; eieio # ack data phase
1: lwz 3,0x30(20) ; andi. 3,3,4 ; beq i2c_read_loop
li 4,0 ; stw 4,0x10(20) ; eieio ; b i2c_stop # stop bit received
add17173: # add GPR3 into GPR17; if passing 2GB (0x10000000), add another 2GB.
lis 0,0x1000 ; cmpld 17,0 ; add 17,17,3 ; bgtlr
cmpld 17,0 ; blelr ; add 17,17,0 ; blr
io_log_init:
LOAD64(r3, SB_NVRAM_adr)
b checkinitLog