;****************************************************************************************
;* setup.s - setting up a few things :)							*
;*											*
;* gets loaded from boot.s to 0500h:0000h						*
;*											*
;* - enables A20									*
;* - loads kernel into memory (1000h:0000h)						*
;* - moves kernel to 100000h (1MB)							*
;* - sets up a GDT									*
;* - sets up an IDT									*
;* - enters PMode									*
;* - checks for valid PE header								*
;* - checks PE-section headers for executable (code) section				*
;* - copy single kernel-sections to destination address					*
;* - integrate kernel-page-directory and requried kernel-page-tables			*
;* - jumps to kernel entry point							*
;*											*
;* The PE routines and the jump to kernel may not be the best (may be they're kinda	*
;* worst ;) but it works (for now)							*
;*											*
;* ToDo:										*
;* - %											*
;*											*
;*											*
;* IDT handler implemented (int 21h):							*
;*											*
;* | ah  | function									*
;* +-----+--------------------------------------------------------------		*
;* | 01h | very (!!) basic output to screen (with scrolling)				*
;* |     |   DS:ESI = string								*
;*											*
;*											*
;* written by Bastian Gloeckle (MrSaint), programmer of nucleOS				*
;* some PE stuff added by Michael Gerh"auser (saberrider)				*
;* mem_copy added 									*
;* added kernel page directory & page tables						*
;*											*
;* Copyright (C) 2004 by nucleOS group							*
;*											*
;* This program is free software; you can redistribute it and/or modify it under the	*
;* terms of the GNU General Public License as published by the Free Software Foundation	*
;* (version 2, June 1991)								*
;*											*
;* This program is distributed in the hope that it will be useful, but WITHOUT ANY	*
;* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A	*
;* PARTICULAR PURPOSE. See the GNU General Public License for more details.		*
;*											*
;* You should have received a copy of the GNU General Public License along with this	*
;* program; if not, write to: 								*
;* Free Software Foundation, Inc.							*
;* 59 Temple Place, Suite 330								*
;* Boston, MA 02111-1307 USA								*
;*											*
;*											*
;* You can contact us by electronic mail: 	admin@saint-soft.de			*
;* 						saberrider@users.sourceforge.net	*
;****************************************************************************************


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; define some constants
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

KERNEL_LENGTH	equ 49			; kernel length in sectors (512 byte pieces)


ORG 0x0500
BITS 16

; STILL IN REAL MODE

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; code entry
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
start:	
	mov ax, cs			; be sure that ds = cs
	mov ds, ax

	mov [bootdrive], dl		; save boot drive (DL set by bootloader)

	cli

	mov ax, 9000h			; set up stack
	mov ss, ax
	mov sp, 200h			; stack = 512 bytes long

	sti

; Moved beyond RAM Detection...
;	mov si, setting_a20_up		; output message: "Setting up A20..."
;	call output_str16

;	call enable_a20_16		; enable a20 line


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; Beginning of RAM Detection...
;
; We're using the often recommended way to detect installed RAM:
;
; first int 15h/ax=e820h   to map bios memory (0-1MB)
; then int 15h/ax=e801h    to detect extended memory (>1MB)
; if one or both of the above fail we use
; int 15h/ah=88h           to
; detect extended memory (1-max. 16MB)
; and maybe later on we use
; int 15h/ah=C7h           to detect extended memory above 16 MB
;
; For more information about this way of checking RAM download
; Ralf Brown's Interrupt List and/or brows to Chris Geezer's
; OS Development page.
;
; http://www-2.cs.cmu.edu/afs/cs.cmu.edu/user/ralf/pub/WWW/
; http://my.execpc.com/~geezer/osd/index.htm
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::


mem_e820:
	MAP_SEG      EQU     0x800    ; O: 0x1000       ; BOTTOM OF STACK!
	TOT_RAM      EQU     0x0      ; O: 0xE000
	COUNTER      EQU     0x4      ; O: 0xE004

	mov ax, MAP_SEG
	mov es, ax
	mov ax, COUNTER
	add ax, 4
	mov di, ax  			; es:di buffer for smap table
	mov dword [es:COUNTER], 0x0
	mov edx, 0x534D4150		; edx = 'SMAP'
	mov eax, 0x0000E820		; ax = e820 - high word of eax should be clear
	xor ebx, ebx			; start at 0x00
	mov ecx, 0x14			; copy 20 bytes (= 1 entry). some bios ignore it
					; and copy _always_ 20 bytes

mem_e820_loop:
	int 15h
	jc mem_e801			; |-+- error > then at least check extended memory OR
	cmp eax, 0x534D4150		; | +- everything alright > go on checking extended memory
	jne mem_e801
	cmp ebx, 0
	je mem_e801			; SMAP ok -> check extended memory
	cmp ecx, 0x14
	jne mem_e801			;

	add di, 0x14			; increase output buffer pointer
	mov edx, eax
	mov eax, 0x0000E820
	inc dword [es:COUNTER]
	jmp mem_e820_loop

mem_e801:				; check total physical amount of ram
    stc
	xor eax, eax
	xor ebx, ebx
	xor ecx, ecx
	xor edx, edx
	mov ax, 0xE801
	int 15h
	jc mem_88
	or ax, bx
	jz use_cd			; some bios return ax = bx = 0
	push ax				; in this case we have to use cx and dx
	push bx				; instead of ax and bx
	xor eax, eax
	xor ebx, ebx			; to be sure the high word of eax and ebx
	pop bx				; is clear
	pop ax
	jmp mem_e801_ok
use_cd:
	xor eax, eax
	xor ebx, ebx
	mov ax, cx
	mov bx, dx
mem_e801_ok:
	shl ebx, 6		; ext mem above 16 MB in 64kB blocks
				; -> ebx * 64 = ext mem above 16MB in kB
	add eax, ebx		; add mem between 1 MB and 16 MB to mem above 16 MB
	add eax, 1024		; add first MB (system mapped memory)
				; => eax = amount of total RAM
	mov dword [es:TOT_RAM], eax
	jmp ramcheck_done


;
; :: TODO :: 9:37 PM 4/20/2004 :: saberrider@users.sourceforge.net ::
;
; :
; : Remove int15h/ah=88h and implement CMOS Memory Detection
; :
;

mem_88:				; Implement int15h/ah=C7h to check ram above 16MB ???
	mov ah, 0x88
	int 15h
	jc bad_ram
	xor ebx, ebx
	mov bx, ax
	add bx, 1024
	mov dword [es:TOT_RAM], ebx
	jmp ramcheck_done

;
; EO ::TODO::
;

bad_ram:
	mov si, bad_ramcheck    ; give out a little error message
	call output_str16       ; that the ramcheck failed and
	cli                     ; halt the cpu
	hlt

ramcheck_done:

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; End of RAM Detection
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::


	mov si, setting_a20_up		; output message: "Setting up A20..."
	call output_str16

	call enable_a20_16		; enable a20 line

; Originally enable_a20 got called BEFORE RAM Detection...

	mov si, loading_kernel		; output message: "Loading kernel..."
	call output_str16

	call load_kernel16		; load kernel to 1000h:0000h

	call shut_down_floppy_motor16	; shut down floppy motor again

	mov ah, 03h			; get cursor position (BIOS interrupt)
	mov bh, 00h
	int 10h
	mov [output_row], dh		; save line number of cursor

;
; :: JUMP to PMode ::
;

	mov si, jumping_pmode_msg	; output message: "Jumping to PMode..."
	call output_str16
	inc byte [output_row]		; next line

	cli				; disable interrupts

	lgdt [gdt_reg]			; load GDT
	lidt [idt_reg_dummy]		; load IDT

	mov eax,cr0			; set PE (Protected-mode-Enable) bit
	or al,1
	mov cr0,eax

	jmp dword SETUP_CODE_SEL:pmode		; do far (!!) jump (to reset CS and (E)IP) (because jump is from 16bit to 32bit we need "dword" --> NASM doc)


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; data and 16bit functions
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::


loading_kernel:		db "Loading kernel...",13,10,0
setting_a20_up:		db "Setting up A20...",13,10,0
jumping_pmode_msg:	db "Jumping to PMode...",13,10,0
bad_ramcheck: 		db "RAM Check failed!", 13, 10, 0
ram_done: 		db "RAM Check done", 13, 10, 0
bootdrive:		db 0
error:			db "ERROR: SETUP-",0


; ........................................................
; outputs a string in real mode using BIOS Interrupt 10h
;
; DS:SI = Adress of source string
; string must be 0-terminated
; CLEARS THE DIRECTION FLAG!
; ........................................................
output_str16:
	push ax
	push bx
	cld
	mov ah, 0Eh
	mov bh, 01h
	.entry:
		lodsb
		or al, al
		jz .end
		int 10h
		jmp .entry
	.end
		pop bx
		pop ax
		ret

; ........................................................
; enables the A20 line
; ........................................................
; code from a tutorial

enable_a20_16:
	jmp .main

	.empty_keyboardbuffer:
                xor al, al
                in  al, 0x64

                test al, 0x02
                jnz .empty_keyboardbuffer

                ret

	.main:
		push ax
		cli

                call .empty_keyboardbuffer

                mov al, 0xD1
                out 0x64, al

                call .empty_keyboardbuffer

                mov al, 0xDF
                out 0x60, al

                call .empty_keyboardbuffer

                sti
		pop ax
		ret

; ........................................................
; shuts down the floppy motor.
; ........................................................
shut_down_floppy_motor16:
	push dx
	push ax
	mov dx, 03F2h
	in al, dx		; get status of port 03F2h
	and al, 0Fh		; disable all motor-on bits
	out dx, al		; send back to port 03F2h
	pop ax
	pop dx
	ret

; ........................................................
; loads the kernel from the bootdrive into memory at
; 1000h:0000h
; ........................................................

load_kernel16:
	xor cx, cx
	.reset:
		inc cx
		cmp cx, 11
		je .reset_error		; try 10 times. Then --> error
		mov ax, 0		; reset the drive
		mov dl, [bootdrive]	; this drive
		int 13h			; do it :) (BIOS Interrupt)
		jc .reset		; If carry flag is set, something went wrong -> try again -> maybe endless loop :(
		jmp .read
	.reset_error:
		mov ax, 01h
		call output_error16
		jmp halt16
	.read:
		mov ax, 1000h		; segment of buffer
		mov es, ax
		mov ax, 10		; start at sector 9
		xor bx, bx		; offset of buffer
		mov cx, KERNEL_LENGTH	; read KERNEL_LENGTH sectors
		call read_fdd
		ret


; ........................................................
; converts an LBA to CHS (only 1,44 MB floppy!!!)
;
; INPUT:
;	AX = LBA value
;
; OUTPUT:
;	[absoluteSector] = sector value
;	[absoluteTrack] = track (cylinder) value
;	[absoluteHead] = Head value
; ........................................................
; code from VnutZ's Bootstrap-tut: http://www.geocities.com/mvea/bootstrap.htm
SectorsPerTrack:	dw 18
NumHeads:		dw 2
absoluteSector:		db 0
absoluteHead:		db 0
absoluteTrack:		db 0

     lba2chs:
          xor     dx, dx				; prepare dx:ax for operation
          div     word [SectorsPerTrack]		; calculate
          mov     byte [absoluteSector], dl
          xor     dx, dx				; prepare dx:ax for operation
          div     word [NumHeads]			; calculate
          mov     byte [absoluteHead], dl
          mov     byte [absoluteTrack], al
          ret


; ........................................................
; reads an amount of sectors from floppy
;
; INPUT:
;	AX = start at sector (LBA)
;	BX = offset of buffer
;	CX = amount of sectors (LBA)
;	ES = segment of buffer
; ........................................................
read_fdd_amount:	dw 0
read_fdd_offset:	dw 0
read_fdd_start:		dw 0
read_fdd_counter:	db 0			; just some counter

read_fdd:
	push dx
	mov [read_fdd_amount], cx		; save values
	mov [read_fdd_offset], bx
	mov [read_fdd_start], ax
	mov [read_fdd_counter], byte 0
	.read:
		push cx				; do it 10 times. Then: output error!
		mov cx, [read_fdd_counter]
		inc cx
		cmp cx, 11
		je .read_error
		mov [read_fdd_counter], cx
		pop cx
		push ax
		call lba2chs

		mov ch, [absoluteTrack]		; cylinder number
		mov dh, [absoluteHead]		; head number
		mov cl, [absoluteSector]	; sector number

		mov ah, 02h			; function = read from file
		mov al, 1			; read one sector
		mov dl, 0			; read from fdd
		mov bx, word [read_fdd_offset]
		int 13h

		mov ax, word [read_fdd_offset]	; add 512 to [read_fdd_offset] (because 1 Sector = 512 Byte)
		add ax, 512
		mov word [read_fdd_offset], ax
		pop ax
		inc ax
		mov bx, [read_fdd_amount]
		add bx, [read_fdd_start]
		sub bx, ax
		or bx, bx
		jnz .read
		jmp .end
	.read_error:
		mov ax, 02h
		call output_error16
		jmp halt16
	.end:
		pop dx
		ret


; ........................................................
; outputs an error message
;
; INPUT:
;	AX: error number
; ........................................................
output_error16:
	push si				; first output the error message
	mov si, error
	call output_str16
	call AXtoString16
	mov ax, di
	mov si, ax
	call output_str16		; output it
	pop ax
	mov si, ax
	ret

output_AX:
	call AXtoString16
	mov ax, di
	mov si, ax
	call output_str16		; output it
	ret

; ........................................................
; converts the content of AX to a readable string in hex
; format
;
; INPUT:
;	AX	number to convert
;
; OUTPUT:
;	DS:DI	Adress of AXtoString_store
; ........................................................
AXtoString16_store:	db 0,0,0,0," ", 0	; Output buffer
AXtoString16:
	push bx
	mov bx, ax
	and bx, 0F0F0h		; get the 4 higher bits of BH and BL
	shr bx, 4		; 4 bitshifts right (move the filtered higher bits to the lower positions)
	add bx, 3030h		; add 48 (ASCII 48 = "0")
	cmp bh, 57
	jng .DoNotAdd_1
	add bx, 0700h		; if > ASCII 57 ("9") => add 7 (-> ASCII 65 = "A")
	.DoNotAdd_1:
		cmp bl, 57
		jng .store_1
		add bx, 0007h		; if > ASCII 57 ("9") => add 7 (-> ASCII 65 = "A")
	.store_1:
		mov [AXtoString16_store], byte bh
		mov [AXtoString16_store+2], byte bl
		mov bx, ax
		and bx, 00F0Fh		; [see above]
		add bx, 3030h
		cmp bh, 57
		jng .DoNotAdd_2
		add bx, 0700h
	.DoNotAdd_2:
		cmp bl, 57
		jng .store_2
		add bx, 0007h
	.store_2:
		mov [AXtoString16_store+1], byte bh
		mov [AXtoString16_store+3], byte bl

	pop bx
	mov di, AXtoString16_store
	ret


; ........................................................
; halts the CPU
; ........................................................
halt16:
	cli
	hlt


; ........................................................
; copies memory from esi to edi
; esi   pointer to source to copy
; edi   pointer to copy-destination
; ecx       amount of bytes to be copied
; ........................................................
mem_copy:
	push eax			; save some registers
	push ebx
	push edx

	mov eax, ecx			; copy ecx to eax
	xor edx, edx			; clear edx
	mov ebx, 4			; set up divisor
	div ebx				; divide ecx:eax by 4
	xor ecx, ecx			; clear ecx
	mov ebx, eax
	inc edx
	inc ebx

mem_copy_lp:
	inc ecx
	cmp ecx, ebx
	je end_mem_copy_lp
	movsw				; should be movsd, but movsd increments esi & edi
	jmp mem_copy_lp			; by 2 and not by 4 as it should do. But movsw increments
					; esi & edi by 4 but it should increment them by 2!
end_mem_copy_lp:

	xor ecx, ecx

mem_copy_lp2:
	inc ecx
	cmp ecx, edx
	je end_mem_copy_lp2
	movsb				; copy remaining bytes
	jmp mem_copy_lp2

end_mem_copy_lp2:

	pop edx
	pop ebx
	pop eax
	ret

;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; 32bit code
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

BITS 32

pmode:
	xor edi,edi			; clear destination- and source-indexregister
        xor esi,esi

	mov ax,SETUP_DATA_SEL		; put descriptor offset of SETUP-Data-Descriptor in DS and SS
	mov ds,ax
	mov ss,ax
	mov esp, 09000h			; use 9000h as stack

	mov ax,LINEAR_SEL		; put LINEAR_SEL into es
	mov es,ax


; ------- SET GDT ADRESSES

	mov esi, setting_gdt32_msg	; output message: "Setting up GDT..."
	mov al, [output_row]
	mov ah, 00h
	mov bl, 07h
	push ds				; we need Data-Segment in FS for output_str
	pop fs
	call output_str
	inc byte [output_row]		; next line

; SET KERNEL ADRESSES (KERNEL_CODE_SEL + KERNEL_DATA_SEL)
	xor eax, eax
	mov ax, 0h

	shl eax, 4
	mov [gdt4 + 2],ax
	mov [gdt5 + 2],ax
	shr eax,16
	mov [gdt4 + 4],al
	mov [gdt5 + 4],al
	mov [gdt4 + 7],ah
	mov [gdt5 + 7],ah


; ------- SET IDT ADRESSES


	mov esi, setting_idt32_msg	; output message: "Setting up IDT..."
	mov al, [output_row]
	mov ah, 00h
	mov bl, 07h
	push ds				; we need Data-Segment in FS for output_str
	pop fs
	call output_str
	inc byte [output_row]		; next line

; code converted from Linux 0.01; THX Linus ;)

	mov edx, int_unhandled		; redirect all interrupts to int_unhandled
	xor eax, eax
	mov ax, SETUP_CODE_SEL
	shl eax, 16
	mov ax, dx
	mov dx, 08E00h
	mov ecx, 100          ; CHANGED by saberrider | ORIGINAL: mov ecx, 256
	mov edi, idt
	idt_loop:
		mov [edi], eax
		mov [edi+4], edx
		add edi, 8
		dec ecx
		jnz idt_loop

; EO code converted from linux

	mov edx, int21h			; our interrupt handler
	xor eax, eax
	mov ax, SETUP_CODE_SEL		; Segment of interrupt handler
	shl eax, 16
	mov ax, dx
	mov dx, 08E00h			; =interrupt gate
	mov edi, idt
	add edi, 21h*8			; out interrupt = 21h
	mov [edi], eax
	mov [edi+4], edx

	mov edx, int_pagehandle		; our interrupt handler
	xor eax, eax
	mov ax, SETUP_CODE_SEL		; Segment of interrupt handler
	shl eax, 16
	mov ax, dx
	mov dx, 08E00h			; =interrupt gate
	mov edi, idt
	add edi, 14*8			; page fault interrupt SR = 14
	mov [edi], eax
	mov [edi+4], edx


	lidt [idt_reg]			; load new IDT


	mov ax, LINEAR_SEL
	mov es, ax

	sti				; enable interrupts

	dec byte [output_row]		; just to correct it...

	mov esi, jumpingkernel32_msg	; use our new created interrupt :)
	mov ah, 01h
	int 21h

	inc byte [output_row]		; do one blank line
	inc byte [output_row]		; do one blank line

	jmp check_header

; PE stuff starts here.
; First we need some space to store few variables and constants

	KERNEL_HEAD	EQU	0x10000	; Kernel starts at 0x100000 (= 1MB)
	PE_OFFSET	db 0,0,0,0	; Offset to PE-Header
	NO_SECTIONS	db 0,0		; Number of Sections in PE
	SECTIONS_START	db 0,0,0,0   	; Pointer to Beginning of Section Headers
	ADDRESS		db 0,0,0,0      ; tmp Variable for Initializing Paging
	K_TEXT_L	db 0,0,0,0      ; Kernel .text Length
	K_DATA_L	db 0,0,0,0      ; Kernel .data Length
	K_TEXT_S	db 0,0,0,0      ; Kernel .text Start
	K_DATA_S	db 0,0,0,0      ; Kernel .data Start

; If there occurs an error while reading the header, give user a little information

bad_header:
	mov esi, bad_head32_msg
	mov ah, 01h
	int 21h				; output errormessage
	jmp halt			; and halt the system

check_header:

	mov eax, [KERNEL_HEAD]
	cmp ax, 0x5A4D			; check for MZ header
	jne bad_header

	mov ax, [KERNEL_HEAD+6]
	cmp ax, 0			; if (Number of Relocation Items = 0)
	jne bad_header
					; and
	mov ax, [KERNEL_HEAD+0x18]
	cmp ax, 40h			; (Offset to Relocation Table = 40h) then
	jne bad_header			; there might be a PE-header ;-D

	mov eax, [KERNEL_HEAD+0x3C]	; Get Relative Address of PE-Header
	mov [PE_OFFSET], eax

	mov ebx, [PE_OFFSET]
	add ebx, KERNEL_HEAD		; Add absolute kernel start to Relative Address of PE-header
	mov eax, [ebx]
	cmp eax, 0x00004550		; Check for PE signature "PE",0,0
	jne bad_header

	mov ebx, [PE_OFFSET]		; Offset to kernel PE-Header
	add ebx, KERNEL_HEAD		; Add Offset to Kernel
	add ebx, 0x06			; Offset to "Number of Sections"-Field
	mov ax, word [ebx]
	mov [NO_SECTIONS], ax		; Save Number of Sections
	cmp ax, 0
	je bad_header

; Search for Code-Section

	add ebx, 0x0E			; Move to "Size of Optional Header"-Field
	xor eax, eax			; need a "cleaned" EAX
	mov ax, [ebx]			; store Size of optional Header
	add ebx, 4			; EBX now points at beginning of Optional Header
	add ebx, eax			; Let EBX point to first section-header in the executable
	mov dword [SECTIONS_START], ebx
	jmp check_section

next_section:
	dec word [NO_SECTIONS]
	jz bad_header
	add ebx, 0x28			; Every section-header is 40 Bytes long -> next section
check_section:
	mov eax, [ebx+0x24]		; save Section-Characteristics in EAX
	and eax, 0x20			; filter out CODE-Flag
	cmp eax, 0x20			; is CODE-Flag set?
	jne next_section		; No? Check next section

; ebx is beginning of current section!
	mov eax, dword [ebx+0x14]
	mov dword [K_TEXT_S], eax
	add dword [K_TEXT_S], 0x10000
	mov eax, dword [ebx+0x10]
	mov dword [K_TEXT_L], eax

	mov ebx, dword [SECTIONS_START]

	jmp check_section2

next_section2:
	dec word [NO_SECTIONS]
	jz bad_header
	add ebx, 0x28			; Every section-header is 40 Bytes long -> next section
check_section2:
	mov eax, [ebx+0x24]		; save Section-Characteristics in EAX
	and eax, 0x40			; filter out INITIALIZED_DATA-Flag
	cmp eax, 0x40			; is INITIALIZED_DATA-Flag set?
	jne next_section2		; No? Check next section

; ebx is beginning of current section!
	mov eax, dword [ebx+0x14]             ; Load and Calculate
	mov dword [K_DATA_S], eax             ; current Kernel .data Position
	add dword [K_DATA_S], 0x10000         ; in memory and store it
	mov eax, dword [ebx+0x10]             ; Do same procedure with
	mov dword [K_DATA_L], eax             ; Kernel .data Size

	jmp check_section3

next_section3:
	dec word [NO_SECTIONS]
	jz bad_header
	add ebx, 0x28			; Every section-header is 40 Bytes long -> next section
check_section3:
	mov eax, [ebx+0x24]		; save Section-Characteristics in EAX
	and eax, 0x80			; filter out UNINITIALIZED_DATA-Flag
	cmp eax, 0x80			; is UNINITIALIZED_DATA-Flag set?
	jne next_section3		; No? Check next section

; ebx is beginning of current section!
	mov eax, [ebx+8]        ; Store size of .BSS section
	mov edi, 501h           ; right after current
	stosd                   ; screen line

; First we have to copy kernel .text to 1 MB

	mov esi, dword [K_TEXT_S]
	mov edi, 0x100000
	mov ecx, dword [K_TEXT_L]
	call mem_copy

; Second we should copy kernel .data to 1,5 MB

	mov esi, dword [K_DATA_S]
	mov edi, 0x180000
	mov ecx, dword [K_DATA_L]
	call mem_copy

; Copy Kernel done.

; Get Kernel Entry Point and save it in Kernel jump instruction

	mov ebx, [PE_OFFSET]    ; MrSaints Solution to read the PE-File's Entry Point is
	add ebx, KERNEL_HEAD    ; be better than just to jump to first byte of CODE-Section.
	add ebx, 0x28           ; Offset to Entry Point is located 40 bytes after beginning
	mov eax, dword [ebx]    ; of PE-Header in "Optional Header"

	mov dword [kernel_code], eax	; Save Offset in Jump Instruction

;::::::::::::::::::::;
;::: Setup Paging :::;
;::::::::::::::::::::;

; Page Table #1023
; > map first MB of physical RAM in last MB of virtual RAM (4095 MB - 4096 MB)

	mov edi, 0x2000			; where PT is located

	mov ecx, 768			; the first 3 MB of last 4 virtual MB 
    pt1023_loop5:			; are unused so
	mov eax, 0			; frame address and attributes have
	stosd				; to be set to zero
	dec ecx				; go on if done
    jnz pt1023_loop5			; or setup next page

	mov dword [ADDRESS], 0x0	; physical address to map

	mov ecx, 8			; how many pages are used?
    pt1023_loop1:
	mov eax, dword [ADDRESS]	; Set physical address
	or eax, 3			; Set Attributes: supervisor level, present, read/write
	stosd				; store dword
	add dword [ADDRESS], 0x1000	; calc new address
	dec ecx				; go on if done
    jnz pt1023_loop1			; or setup next page

; we gotta take out kernel stack space between 0x8000 and 0x6C000!!

	mov ecx, 100			; how many pages are used?
    pt1023_loop2:
	mov eax, 0			; unused -> 0
	stosd				; store dword
	dec ecx				; go on if done
    jnz pt1023_loop2			; or setup next page

; go on with address 0x6C000

	mov dword [ADDRESS], 0x6C000
	mov ecx, 148			; how many pages are used?
    pt1023_loop3:
	mov eax, dword [ADDRESS]	; Set physical address
	or eax, 3			; Set Attributes: supervisor level, present, read/write
	stosd				; store dword
	add dword [ADDRESS], 0x1000	; calc new address
	dec ecx				; go on if done
    jnz pt1023_loop3			; or setup next page


; Page Table #881
; > Kernel Stack - map physical area between 0x8000 and 0x6B000 to 0xDC7FF000 and 0xDC800000

	mov edi, 0x3000

	mov ecx, 924
    pt881_loop1:
	mov eax, 0
	stosd
	dec ecx
    jnz pt881_loop1

	mov dword [ADDRESS], 0x8000
	mov ecx, 100
    pt881_loop2:
	mov eax, dword [ADDRESS]
	or eax, 3 			; Set Attributes: supervisor level, present, read/write
	stosd
	add dword [ADDRESS], 0x1000
	dec ecx
    jnz pt881_loop2


; Page Table #882
; > kernel .text - map physical RAM area between 0x100000 and 0x180000 to 0xDC800000 and 0xDC880000

	mov edi, 0x4000

	mov dword [ADDRESS], 0x100000
	mov ecx, 128
    pt882_loop1:
	mov eax, dword [ADDRESS]
	or eax, 1			; Set Attributes: supervisor level, present, read only
	stosd
	add dword [ADDRESS], 0x1000
	dec ecx
    jnz pt882_loop1

	mov ecx, 896
    pt882_loop2:
	mov eax, 0
	stosd
	dec ecx
    jnz pt882_loop2

; Page Table #896
; > kernel .data - map physical RAM area between 0x180000 and 0x200000 to 0xE0000000 and 0xE0080000

	mov edi, 0x5000

	mov dword [ADDRESS], 0x180000
	mov ecx, 128
    pt896_loop1:
	mov eax, dword [ADDRESS]
	or eax, 3			; Set Attributs: supervisor level, present, read/write
	stosd
	add dword [ADDRESS], 0x1000
	dec ecx
    jnz pt896_loop1

	mov ecx, 896
    pt896_loop2:
	mov eax, 0
	stosd
	dec ecx
    jnz pt896_loop2

; Page Table #910
; > kernel .heap - map physical RAM area between 0x200000 and 0x300000 to 0xE3800000 and 0xE3880000

	mov edi, 0x6000

	mov dword [ADDRESS], 0x200000
	mov ecx, 256
    pt910_loop1:
	mov eax, dword [ADDRESS]
	or eax, 3			; Set Attributs: supervisor level, present, read/write
	stosd
	add dword [ADDRESS], 0x1000
	dec ecx
    jnz pt910_loop1
    
	mov ecx, 768
    pt910_loop2:
	mov eax, 0
	stosd
	dec ecx
    jnz pt910_loop2

; We need a temporary Page Table to set the paging bit. I'm storing this table right at
; the beginning of kernel's heap, so it gets overwritten when the kernel is executed

; Page Table #0 (temp)
; > map first four physical MB to virtual first four MB (temporary)

	mov edi, 0x200000		; where PT is located

	mov dword [ADDRESS], 0x0	; physical address to map

	mov ecx, 1024			; how many pages are used?
    pt0_loop1:
	mov eax, dword [ADDRESS]	; Set physical address
	or eax, 3			; Set Attributes: supervisor level, present, read/write
	stosd				; store dword
	add dword [ADDRESS], 0x1000	; calc new address
	dec ecx			; go on if done
    jnz pt0_loop1		; or setup next page


; Page Directory

	mov edi, 0x7000

	mov ecx, 1024
    pd_loop:
	mov eax, 2
	stosd
	dec ecx
    jnz pd_loop

	mov edi, 0x7000        ; PT #0 -> [temp]
	mov eax, 0x200000
	or eax, 3
	stosd
	mov edi, 0x7FFC        ; PT #1023 -> "system"
	mov eax, 0x2000
	or eax, 3
	stosd
	mov edi, 0x7DC4        ; PT #881 -> Stack
	mov eax, 0x3000 ;0xFFF03000
	or eax, 3
	stosd
	mov edi, 0x7DC8        ; PT #882 -> .text
	mov eax, 0x4000 ;0xFFF04000
	or eax, 3
	stosd
	mov edi, 0x7E00        ; PT #896 -> .data
	mov eax, 0x5000 ;0xFFF05000
	or eax, 3
	stosd
	mov edi, 0x7E38        ; PT #910 -> .bss
	mov eax, 0x6000 ;0xFFF06000
	or eax, 3
	stosd


    ; Let CPU know, where our Page Directory is located/Set CR3
	mov eax, 0x7000
	mov cr3, eax

	mov edx, flush
	add edx, 0xFFF00000

    ; Enable Paging/Set Paging Bit in CR0
	mov eax, cr0
	or eax, $80000000
	mov cr0, eax
	jmp edx
    flush:
	mov eax, cr3
	mov cr3, eax

  ; patch IDT:
	mov eax, idt
	add eax, 0xFFF00000

	mov dword [idt_reg+2], eax

	mov dword eax, idt_reg
	add eax, 0xFFF00000

	lidt [eax]


  ; patch GDT:
	mov eax, gdt
	add eax, 0xFFF00000

	mov dword [gdt_reg+2], eax

	mov dword eax, gdt_reg
	add eax, 0xFFF00000

	lgdt [eax]

	mov edi, 0xFFF07000
	mov eax, 2
	stosd

	mov eax, cr3
	mov cr3, eax

; End init Paging

; we use the Kernel-IDT space here to save the position where our cursor should be

	mov edi, 0xFFF00500
	mov ebx, output_row
	add ebx, 0xFFF00000
	mov byte al, [ebx]
	mov byte [es:edi], al

; Jump to Kernel

    db 0xEA      			; 32 bit opcode ("jmp dword")
kernel_code:
    dd 0				; Relative address in Segment where we want to jump to
    dw KERNEL_CODE_SEL		; Kernel Code Segment


    jmp halt


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; data and 32bit functions
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

jumpingkernel32_msg:	db "Jumping to kernel...",0
setting_gdt32_msg:	db "Setting up GDT...",0
setting_idt32_msg:	db "Setting up IDT...",0
bad_head32_msg		db "Bad kernel header signature!",0


; ........................................................
; outputs a string in PMode by directly putting it into
; the Video-Buffer
; CLEARS THE DIRECTION FLAG
;
; INPUT:
;	FS:ESI = adress of string
;	AL = row of output
;	AH = column of output
;	BL = attribute:
;
;		COLOR ATTRIBUTES
;
;		0   1       3         7
;		+---+-------+---------+
;		| b | backg | foregr. |
;		+---+-------+---------+
;
;		bit0		blink
;		bit1:bit3	backgound (colors 0-7)
;		bit4:bit7	foreground (colors 0-15)
;
;		color values
;		0 black			8 dark grey
;		1 blue			9 bright blue
;		2 green			10 bright green
;		3 cyan			11 bright cyan
;		4 red			12 pink
;		5 magenta		13 bright magenta
;		6 brown			14 yellow
;		7 white			15 bright white
;
;		e.g.: 
;			BL = 00000111b = 07h (white on black)
;			BL = 11000000b = C0h (black on red; blink)
;
; OUPUT:
;	AL = row of output
;	AH = column of output
; ........................................................
output_str:
	push edi
	push es
	push ebx
	push edx
	push ecx
	mov cx, ax			; save our row and column in cx: CL = row; CH = column
	cld
	mov ax, LINEAR_SEL
	mov es, ax
	xor eax, eax			; be sure eax is clear
	mov al, cl			; calculate: 0B8000h + ([row] * 80 + [col]) * 2
	imul eax, 80
	xor edx, edx
	mov dh, ch
	add eax, edx			; add column value
	shl eax, 1			; *2
	add eax, 0B8000h
	mov edi, eax			; copy to destination-indexregister
	.entry:
		push ds			; save ds
		push fs			; set ds = fs (out string is at FS:ESI)
		pop ds
		lodsb			; load one byte from DS:ESI to AL
		pop ds			; restore our saved ds
		or al, al		; if byte is 0 then exit routine
		jz .end
		inc ch			; increase column value
		mov [es:edi], al	; move character byte to video buffer
		mov eax, edi		; edi = edi + 1
		inc eax
		mov edi, eax
		mov [es:edi], bl	; move attribute to video buffer
		mov eax, edi		; edi = edi + 1
		inc eax
		mov edi, eax
		jmp .entry
	.end:
		mov ah, ch
		mov al, cl
		pop ecx
		pop edx
		pop ebx
		pop es
		pop edi
		ret

; ........................................................
; halts the CPU
; ........................................................
halt:
	cli
	hlt


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; GDT
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

BITS 16

; this is loaded in the CPU register ( lgdt [gdt_reg] )
gdt_reg:
	dw gdt - gdt_end - 1	; GDT limit
	dd gdt			; (GDT base gets set above)

; P + DPL + S + type:
;
;	15  14    12  11          7
;	+---+-----+---+----------+
;	| P | DPL | S |   type   |
;	+---+-----+---+----------+
;	P = Present; DPL = Define Privilege Level (ring 0 - ring 3); 
;	S = [Code- or Datasegment (bit set)] or Systemsegemtn (bit cleared)
;	
; type:
;	DATA SEGMENT:
;
;	11  10   9   8   7
;	+---+---+---+---+
;	| T | E | W | A |
;	+---+---+---+---+
;	T = Code- or Data-Segment (T=0: Data; T=1: Code)
;	E = expand down (do so if bit is set); W = write enable (W=0: read only; W=1: read+write)
;	A = accessed
;
;	CODE SEGMENT:
;
;	11  10   9   8   7
;	+---+---+---+---+
;	| T | C | R | A |
;	+---+---+---+---+
;	T = Code- or Data-Segment (T=0: Data; T=1: Code)
;	C = conforming; R = read enable (R=0: execute only; R=1: execute+read)
;	A = accessed
;
;	SYSTEM SEGMENT:
;	type = 0010b (LDT entry)
;	type = x0x1b (TSS entry)
;	type = x100b (Call Gate)
;	type = 0101b (Task Gate)
;	type = x110b (Interrupt Gate)
;	type = x111b (Trap Gate)
;	type = x000b (reserved)
;	type = 1010b (reserved)
;	type = 1101b (reserved)
;
;
;
;
; limit 19:16 + flags:
;
;	23  22  21  20  19           16
;	+---+---+---+---+------------+
;	| G | D | 0 | A*| limit 19:16|
;	+---+---+---+---+------------+
;	G = granularity (G=0: limit in bytes; G=1: limit in 4KBytes)
;	D = 16bit or 32bit (D=0: 16bit; D=1 32bit)
;	A* = AVL = Available

; this is the GDT itself
; null descriptor
gdt:	dw 0			; limit 15:0
	dw 0			; base 15:0
	db 0			; base 24:15
	db 0			; P + DPL + S + type (look above)
	db 0			; limit 19:16, flags (look above)
	db 0			; base 31:24

; linear data segment descriptor
LINEAR_SEL	equ	$-gdt
gdt1:	dw 0xFFFF		; limit 0xFFFFF
	dw 0			; base 0
	db 0
	db 10010010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; user code segment descriptor
SYS_CODE_SEL	equ	$-gdt
gdt2:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10011010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; user data segment descriptor
SYS_DATA_SEL	equ	$-gdt
gdt3:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10010010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; kernel code segment descriptor
KERNEL_CODE_SEL	equ	$-gdt
gdt4:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10011010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; kernel data segment descriptor
KERNEL_DATA_SEL	equ	$-gdt
gdt5:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10010010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; SETUP code segment descriptor
SETUP_CODE_SEL	equ	$-gdt
gdt6:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10011010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

; SETUP data segment descriptor
SETUP_DATA_SEL	equ	$-gdt
gdt7:   dw 0xFFFF               ; limit 0xFFFFF
	dw 0			; base
	db 0
	db 10010010b		; P + DPL + S + type (look above)
        db 11001111b		; limit 19:16, flags (look above)
	db 0

gdt_end:


;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; data and 32bit functions (needed for IDT)
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

BITS 32


output_row:	db 0


; ........................................................
; sets the cursor position
;
; INPUT:
;	AL = row
;	AH = column
; ........................................................
SetCursorPos:
	cli	
	push eax
	push edx
	push ebx				; calculate "absolute" adress (out of row+column)
	mov bx, ax
	and bx, 000FFh				; calc line number * 80 (beacause every line has 80 columns)
	imul bx, 80
	and ax, 0FF00h				; add column
	shr ax, 8
	add bx, ax				; BX = "absolute" adress

	mov al, 14				; say to CRT controller: We want to set the higher bits of cursor position
	mov dx, 03D4h
	out dx, al
	mov ax, bx
	mov dx, 03D5h
	xchg al, ah				; so we have higher bits in al!
	out dx, al				; set it

	mov al, 15				; say to CRT controller: We want to set the lower bits of cursor position
	mov dx, 03D4h
	out dx, al
	mov ax, bx
	mov dx, 03D5h
	out dx, al				; set it

	pop ebx
	pop edx
	pop eax
	sti
	ret

; ........................................................
; scrolls the content of the screen one line down
; ........................................................
ScrollOneLine:
	push eax
	push es
	push esi
	push edi
	mov ax, LINEAR_SEL
	mov es, ax
	mov ecx, 3840			; # of characters to copy (line 2-25)
	mov esi, 0B80A0h		; start at line 2
	mov edi, 0B8000h
	.entry:
		mov al, byte [es:esi]	; copy one byte
		mov [es:edi], al
		inc edi
		inc esi
		loop .entry
	pop edi
	pop esi
	pop es
	pop eax
	ret



;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; Interrupt Handler
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

int21h:
	cli					; disable interrupts
	cmp ah, 01h
	je .function_01h
	jmp .function_unknown

	.function_01h:
		push ebx			; save some values to the stack
		push ds
		push ds				; set FS = DS (output_str needs the Data-segment of the string in FS)
		pop fs
		mov ax, SETUP_DATA_SEL		; set correct data segment (so we can access e.g. output_row)
		mov ds, ax
		inc byte [output_row]
		cmp byte [output_row], 25	; if we are at the bottom of the page, do scrolling
		jne .goon
		call ScrollOneLine		; do scrolling!
		mov al, 24			; set cursor pos
		xor ah, ah
		call SetCursorPos
		mov byte [output_row], 24	; correct variable
		.goon:
			mov al, byte [output_row]
			mov ah, 00h
			mov bl, 111b		; "white" on black
			call output_str
			call SetCursorPos
		mov al, 01h;			; return code: 01h = success
		pop ds				; resote pushed values
		pop ebx
		jmp .end

	.function_unknown:
		mov al, 00h;			; return code: 00h = ERROR
		jmp .end
	
	.end:
		sti				; enable interrupts again
		iret

int_unhandled:
	cli
	mov al, 00h				; return code: 00h = ERROR
	sti
	iret

;
; :: Page Fault ISR :: 9:50 PM 4/20/2004 :: saberrider
;
; > Implemented for debugging purposes at nucleOS memory management redesign
;
; > can only be called by system between lidt after jump to protected mode
;   and enabling paging
;

int_pagehandle:
	push ebp
	mov ebp, esp
        pushad
        sti
        mov eax, [ebp+8]
        mov ebx, [ebp+4]
        mov ecx, cr2

;        popad               ;// restore EAX, ECX, EDX, EBX, ESP, EBP, ESI and EDI
        leave               ;// -> mov esp, ebp | pop ebp
        add esp, 4          ;// "delete" error code (-> cpu now knows, we handled the fault
	mov edx, 0x2A5EFA17
	cli
	hlt
        iret;                // return to faulting code



;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
; IDT
;:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::

idt_reg_dummy:		; dummy IDTR
	dw 0
	dd 0

idt_reg:
	dw idt_end - idt - 1	; IDT limit
	dd idt

idt: times 100*8 db 00h  	; CHANGED BY saberrider | ORIGINAL: idt: times 256*8 db 00h
idt_end:

; IDT changed 'cause of setup.s size

times 4096-($-$$) db 90h ; Filelength: 4096 Bytes = 4,0 KByte = 8 x 200h byte = 8 Sektoren
