; ; Raw Game Engine ; Copyright (C) 2023 Ernest Deak ; ; This program is free software: you can redistribute it and/or modify ; it under the terms of the GNU General Public License as published by ; the Free Software Foundation, either version 3 of the License, or ; (at your option) any later version. ; ; This program is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ; GNU General Public License for more details. ; ; You should have received a copy of the GNU General Public License ; along with this program. If not, see . ; BITS 64 DEFAULT REL %include "inc/internal.inc" %include "include/rge.inc" %include "inc/rge.inc" extern RG_Subsys_Img_init extern RG_Subsys_TTF_init section ro_section astr: db 'Starting RGE...',0 fmt: db "%s",10,0 dbgfmt: db "RGE [DBG] %s",10,0 window_name: db 'RGE',0 section .bss ;; Global variable - holding a pointer to the string to be used as title Title_ptr: resq 1 ;; Global variable - location for the main renderer MainRenderer: resq 1 ;; Global variable - location for the main window MainWindow: resq 1 ;; Width of window MainWindow.w: resq 1 ;; Height of window MainWindow.h: resq 1 ;; X Position of window MainWindow.x: resq 1 ;; Y Position of window MainWindow.y: resq 1 ;; Camera texture target pointer ;; NOTE: The camera is really a large texture onto which everything is ;; rendered prior to being presented into the main window based on the ;; source-rectangle slice which is moved around to follow the player Camera: resq 1 ;; Camera implicit SDL_rect, structure ;; Camera X position Camera.x: resd 1 ;; Camera Y position Camera.y: resd 1 ;; Camera/level width Camera.w: resd 1 ;; Camera/level height Camera.h: resd 1 ;; Flag to check if engine is to run in the main loop RG.gRunning: resb 1 ;; Misc. flags. Reserved. RG.gFlags: resb 1 ;; Control flags, like pause and others (layer switching?) RG.pause_update: resb 1 RG.pause_obj_update: resb 1 RG.pause_render: resb 1 RG.pause_mainloop: resb 1 section code_section ;; Initializes the engine. ;; Setups things with SDL_Init, creates a window and a renderer. ;; ;; This gets stored then inside the MainWindow and MainRenderer .bss ;; variables. ;; @return a pointer on success, 0 on error ;; RG_Init: funcstart 2 stksetup exmcall SDL_Init, SDL_INIT_VIDEO test rax,rax ccall nz, RG_AbortWithSDLError mov rax, [Title_ptr] test rax,rax jnz .title_ok ; no title supplied, use default lea rax, [window_name] mov [Title_ptr], rax .title_ok: ;check window size mov rax, [MainWindow.x] test rax,rax jz %$window_dim_not_set mov rax, [MainWindow.y] jz %$window_dim_not_set mov rax, [MainWindow.w] jz %$window_dim_not_set mov rax, [MainWindow.h] jnz %$window_dim_ok %$window_dim_not_set: mcall RG_DebugPrint, "Window dimensions not set!" call RG_Abort %$window_dim_ok: ; check camera size mov rax, [Camera.w] test rax,rax jz %$camera_dim_not_set mov rax, [Camera.h] test rax,rax jnz %$camera_dim_ok %$camera_dim_not_set: mcall RG_DebugPrint, "Camera dimensions not set!" call RG_Abort %$camera_dim_ok: exmcall SDL_CreateWindow, [Title_ptr],\ [MainWindow.x], [MainWindow.y],\ [MainWindow.w], [MainWindow.h],\ 4 ; rax = contains SDL_Window* mov [MainWindow], rax exmcall SDL_CreateRenderer, rax, -1, 4 | 2 mov [MainRenderer], rax test rax,rax ccall z, RG_AbortWithSDLError ; abort if rax is 0 ; create camera texture target exmcall SDL_CreateTexture, [MainRenderer], SDL_PIXELFORMAT_RGBA32,\ SDL_TEXTUREACCESS_TARGET, [Camera.w], [Camera.h] test rax,rax ccall z, RG_AbortWithSDLError mov [Camera], rax ; camera texture pointer, should be larger than destination ; set camera as rendering target exmcall SDL_SetRenderTarget, [MainRenderer], [Camera] ; everything initialized, jump back indicating success ; with a pointer to the camera texture mov rax, [Camera] funcend ;; Main update loop ;; Disabled when RGE_OPTIM_UPDATE_DURING_RENDER is set to 1 RG_Update: ; areg1 contains DT funcstart var 8, %$dt, %$float_dt stksetup movzx r11, byte [RG.pause_update] test r11, r11 jnz %$skip_game_update movq [%$float_dt], xmm1 mov [%$dt], areg1 call game_update %$skip_game_update: movzx r11, byte [RG.pause_obj_update] test r11, r11 jnz %$funcend ; update objects if not 0 mov areg1, [%$dt] movq xmm1, [%$float_dt] call RG_ObjectList_Process ; skips update of objects %$funcend: funcend ;; Loops through the RG_RenderList and renders the objects found in it RG_RenderObjects: funcstart saveregs xmm0,xmm1,rbx,r12,r13,r14,r15,rdx var 8, %$layer, %$dt, %$max_idx, %$instance stksetup mov [%$dt], areg1 %if RGE_OPTIM_UPDATE_DURING_RENDER ; areg1 set to dt exmcall game_update ; restore areg1 just in case mov [%$dt], areg1 %endif mov qword [%$layer], 0 %define %$reg_bitidx r14 %define %$reg_instance r11 xor rdx,rdx .loop_main: xor %$reg_bitidx, %$reg_bitidx mov rax, RG_OBJECT_SIZE mov r11, [%$layer] mul r11 lea rbx, [RG_Objects] add rbx, rax ; add start offset of stride ; calculate bitmask offset mov rax, 8 ; bytes per layer mov r13, [%$layer] mul r13 mov r13, rax ; r13 = bitmask offset ; caluclate max index lea r12, [RG_OList_Bitmask] add r12, r13 mov r12, [r12] ; r12 = bitmask for layer bsr rax, r12 ; rax = max index ; store to variable mov [%$max_idx], rax .loop_layer: lea r12, [RG_OList_Bitmask] add r12, r13 mov r12, [r12] bt r12, %$reg_bitidx jnc .next ;check if bit indicates allocated object ; pointer = index * stride %if RGE_OPTIM_UPDATE_DURING_RENDER lea areg1, [rbx] ; fetch pointer from list mov areg2, [%$dt] exmcall game_object_handler %endif ; fetch object from list again ; (maybe could leave this out since rbx should be preserved ; across calls) lea %$reg_instance, [rbx] mov [%$instance], %$reg_instance mov r10, [AnimSprite(%$reg_instance, visible)] test r10,r10 ; check if .visible flag is 0 jz %$end_render_call ; skip rendering if 0 ; else just continue and render the sprite ; but first call game_object_render for the ; calling code so it can do some custom rendering mov areg1, rbx ; pass full pointer mov areg2, [%$dt] ; pass in dt as well ; xmm1 should already contain the DT call game_object_render mov %$reg_instance, [%$instance]; restore r11 mov rbx, [%$instance] lea areg3, [AnimSprite(%$reg_instance,srcrect) + RG_ANIM_SPRITE_OFFSET] lea areg4, [AnimSprite(%$reg_instance,dstrect) + RG_ANIM_SPRITE_OFFSET] mov areg5, 0 ; the point mov areg6, [AnimSprite(%$reg_instance,flip) + RG_ANIM_SPRITE_OFFSET] ; flip xorpd xmm0, xmm0 ; angle = 0 ; areg6 is actually flip parameter because the angle is a double and the point a pointer to SDL_FPoint exmcall SDL_RenderCopyExF, [MainRenderer], [AnimSprite(%$reg_instance, img) + RG_ANIM_SPRITE_OFFSET] movapd xmm0, [%$saved_xmm0] ; restore xmm0 %$end_render_call: .next: inc %$reg_bitidx cmp %$reg_bitidx, qword [%$max_idx] jg .next_layer ; if less, loop layer still add rbx, RG_STRIDE jmp .loop_layer .next_layer: inc qword [%$layer] mov r13, [%$layer] xor %$reg_bitidx, %$reg_bitidx cmp r13, RGE_MAX_LAYERS jnz .loop_main .loop_layer_end: ; if here, layer loop is over .loop_main_end: funcend ;FIXME: need to figure out how to deal with bitmasks longer than 64 bits ; as that allows only for 64 elements per layer ;; Calls the object handler for each object found in the list ;; with dt as the second parameter and the object pointer as the first ;; @param areg1 - delta time RG_ObjectList_Process: funcstart saveregs rbx, r12 var 8, %$count, %$max_idx, %$layer_count, %$cur_bitmask, %$dt, %$offset, %$float_dt stksetup movq [%$float_dt], xmm1 mov [%$dt], areg1 mov qword [%$layer_count], 0 .layer_loop: mov rax, [%$layer_count] mov r11, 8 ;8 bytes per layer mul r11 lea rbx, [RG_OList_Bitmask] add rbx, rax mov rbx, [rbx] mov [%$cur_bitmask], rbx bsr rax, rbx jz .layer_next ; if nothing is allocated, move to next layer ; rax = contains largest index ; (most significant bit set) ; max index of update loop mov [%$max_idx], rax ; our counter mov qword [%$count], 0 ; rbx = bitmask ; add layer offset stride mov rax, [%$layer_count] mov r11, RG_OBJECT_SIZE mul r11 ; rax = initial offset lea rbx, [RG_Objects] add rbx, rax ; rbx = index 0 + initial stride offset mov [%$offset], rbx .loop: mov rbx, [%$cur_bitmask] mov rax, [%$count] bt rbx, rax jnc .next mov rbx, [%$offset] ; NOTE: EXPERIMENTAL PREFETCHING ;------------------------------- ; prefetcht2 [rbx+RG_STRIDE+(RG_STRIDE*7)] ;------------------------------- ; NOTE: ; The game itself is responsible for handling classes and ; other ways of object categorization to call the apropriate ; functions. We pass in just the pointer mov areg2, [%$dt] lea areg1, [rbx] movq xmm1, [%$float_dt] call game_object_handler .next: add qword [%$offset], RG_STRIDE mov r11, [%$count] mov rax, [%$max_idx] inc r11 mov [%$count], r11 ; increment count cmp r11, rax jle .loop ; if all indexes in the current layer are processed ; increment layer index .layer_next: mov rax, [%$layer_count] inc rax mov [%$layer_count], rax cmp rax, RGE_MAX_LAYERS jl .layer_loop .end: funcend ;; Allocates a memory slot and copies the whole struct into it ;; @param areg1 - pointer to a structure of data (on the stack ideally) ;; @param areg2 - layer to place it in RG_Alloc: funcstart saveregs rdx, rbx, r12 stksetup ; calculate stride for bitmask xor rdx,rdx mov rax, 8 mul areg2 mov r12, rax ; ; update bitmask ; lea rax, [RG_OList_Bitmask] add rax, r12 mov rbx, rax ; address of layer bitmask mov rax, [rax] not rax ; invert bitmask mov r11, rax bsf rax, r11 not r11 ; correct the inverted bitmask bts r11, rax ; rax = contains index clc ;clear carry cause it gets set ; rbx = points to offset in bitmask ; r11 = contains bitmask, corrected mov [rbx], r11 ; move index into r11 mov r11, rax ; ; now allocate at the index pointed to by r11 ; .alloc: xor rdx,rdx lea r10, [RG_Objects] ; calculate stride start mov rax, RG_OBJECT_SIZE mul areg2 ; rax = stride start ; add it to the objects pointer add r10, rax mov rax, RG_STRIDE ; move slide step into rax ; calculate (index * stride) ; moves by needed amounts of chunks mul r11 ; rax = should contain the needed offset ; move r10 by amount in rax add r10, rax lea r10, [r10] mov rbx, r11 mov areg2, areg1 mov areg1, r10 mov areg3, RG_OBJECT_SIZE exmcall memcpy ;return same pointer to the newly allocated area ; rax = dest ptr funcend ;; Deallocates memory slot ;; @param areg1 - pointer as returned by RG_Alloc ;; @param areg2 - layer ;; @return undefined RG_Dealloc: funcstart saveregs rbx, rdx, r13, r12 stksetup ; if areg1 is 0, we just bail out test areg1,areg1 ; test for null-pointer jz %$end ; if 0 theres nothing to deallocate, exit function ; this should be handled by the caller, but it makes it more stable ; like this ; calculate bitmask stride mov rax, 8 mul areg2 mov r12, rax ; rax = layer * 8 mov rax, RG_OBJECT_SIZE mul areg2 ; rax = start stride lea rbx, [RG_Objects] ; add start stride to correct offset add rbx, rax sub rbx, areg1 neg rbx ; rdi will be bigger, so we got to undo the underflow ; diff now in rbx. we calculate the index by div mov rax, rbx xor rdx,rdx mov rbx, RG_STRIDE ; memory is made of equaly sized chunks + layers which we call a stride, ; RGE_MAX_LAYERS*RG_OBJECT_SIZE bytes div rbx ; we divide by that number to get an index ; index should now be in rax ; so we clear the bit at that index + layer lea r11, [RG_OList_Bitmask] add r11, r12 btr [r11], rax clc ; clear the set carry ; NOTE: ; we maybe could even use a xmm/vector instruction to calculate ; multiple indexes and offsets ; Delete associated texture. Maybe turn this into an option ; because the user might not be aware that it gets deleted %ifdef RGE_OPTIM_DEALLOC_FREE_TEXTURE mov areg1, [AnimSprite(areg1, img)] exmcall SDL_DestroyTexture %endif %$end: funcend ;; Main render loop RG_Render: funcstart saveregs xmm1, areg1 %xdefine %$areg1 %$saved_%[areg1] var SDL_rect_size, %$srcrect, %$dstrect stksetup mov areg1, [%$areg1] exmcall SDL_RenderClear, [MainRenderer] mov areg1, [%$areg1] movdqa xmm1, [%$saved_xmm1] call game_render_pre mov areg1, [%$areg1] movdqa xmm1, [%$saved_xmm1] call RG_RenderObjects mov areg1, [%$areg1] movdqa xmm1, [%$saved_xmm1] call game_render_post exmcall SDL_SetRenderTarget, [MainRenderer], 0 exmcall SDL_RenderClear, [MainRenderer] mov [SDL_rect(%$dstrect, x)], dword 0 mov [SDL_rect(%$dstrect, y)], dword 0 mov eax, [Camera.x] mov [SDL_rect(%$srcrect, x)], eax mov eax, [Camera.y] mov [SDL_rect(%$srcrect, y)], eax mov rax, [MainWindow.w] mov [SDL_rect(%$dstrect, w)], eax mov [SDL_rect(%$srcrect, w)], eax mov rax, [MainWindow.h] mov [SDL_rect(%$dstrect, h)], eax mov [SDL_rect(%$srcrect, h)], eax ; the width of srcrect and dstrect should be the same ; to render the smaller camera slice ; NOTE: Now we only update the Camera.x and Camera.y ; to move the visible slice around ; NOTE: If the camera x and y become negative, the texture ; starts stretching instead. This is likely because of the fact ; that there is no further texture beyond these coordinates ; same thing would likely happen if the x and y is larger than the ; texture itself lea areg3, [%$srcrect] lea areg4, [%$dstrect] exmcall SDL_RenderCopy, [MainRenderer], [Camera] exmcall SDL_RenderPresent, [MainRenderer] exmcall SDL_SetRenderTarget, [MainRenderer], [Camera] funcend ;; Polling of events. Such as handling keys, joystick, etc ;; @param jmp_table_addr - address of jump table to use for handling of events RG_PollEvents: funcstart saveregs areg1, areg2 var SDL_Event_size, %$event stksetup %$event_loop: lea areg1, [%$event] exmcall SDL_PollEvent test eax,eax jz %$no_more_events mov eax, u32 [%$event] ; load .type into eax, reg acess is faster than mem-access cmp eax, SDL_QUIT ifc z ; if it equals SDL_QUIT ; default close behaviour so we can close the window ; normally call RG_Abort endif ; we let the called extern code decide what to do with it (for now at least) lea areg1, [%$event] mov areg2, rax ; areg1 - pointer to event struct ; areg2 - event type call game_event_handler jmp %$event_loop %$no_more_events: funcend ;; Gracefull quit function RG_Quit: funcstart stksetup ; perform cleanup mov areg1, [MainRenderer] exmcall SDL_DestroyRenderer mov areg1, [MainWindow] exmcall SDL_DestroyWindow funcend RG_set_title: mov [Title_ptr], areg1 ret RG_set_win_dim: mov [MainWindow.x], areg1 mov [MainWindow.y], areg2 mov [MainWindow.w], areg3 mov [MainWindow.h], areg4 ret RG_set_camera_dim: mov [Camera.w], areg1d mov [Camera.h], areg2d ret ;; Abort, usually called on error RG_Abort: funcstart call RG_Quit xor rdi,rdi mov byte [RG.gRunning], dil exmcall SDL_Quit exmcall exit, 1 ; should call libc exit funcend RG_AbortWithSDLError: jmp RG_Abort ; TODO %define TARGET_MS 16 ; 16 ms is around 60 fps ;; Main update-render-loop RG_MainLoop: ; Perfect, it even stays at 16 when idling thanks to the delay funcstart var 8, %$now, %$last, %$dt, %$float_dt stksetup mov qword [%$last], 0 exmcall SDL_GetTicks64 mov [%$now], rax .loop: mov rax, [%$now] mov [%$last], rax exmcall SDL_GetTicks64 mov [%$now], rax call RG_PollEvents mov areg1, [%$now] sub areg1, qword [%$last] mov [%$dt], areg1 ; convert to single-prec. float and divide by 1000 as ; first floating point parameter to RG_Update ; FIXME: we have to make sure it doesnt get destroyed by other ; functions cvtsi2ss xmm1, areg1 mov rax, 1000 cvtsi2ss xmm2, rax divss xmm1, xmm2 xorps xmm2,xmm2 movq [%$float_dt], xmm1 ; areg1 and xmm1 now contains DT ; ; OPTIMIZATION OPTION ; perform update if update and render is decoupled ; the bellow macro checks for this ; if its not enabled, it first updates the whole list ; and then renders. ; If enabled, it avoids a whole loop of fetching and calculating ; object addresses and the bellow call _does not_ get assembled %ifn RGE_OPTIM_UPDATE_DURING_RENDER ; areg1 still contains dt call RG_Update mov areg1, [%$dt] ; if not assembled, areg1 still contains dt ; if assembled, RG_Update might change the register ; so we store dt back into areg1 %endif ; Now render, the macro check above is also in RG_RenderObjects call RG_Render ; get current ticks and check if target fps in ms is reached exmcall SDL_GetTicks64 mov rbx, [%$now] sub rax, rbx mov rbx, TARGET_MS ; check if target ms has been reached cmp rax, rbx jg .nodelay ; if its less, delay the difference to reach target ms ; for a more consistent frame-update cycle sub rbx, rax mov areg1, rbx exmcall SDL_Delay .nodelay: cmp byte [RG.gRunning], 0 jnz .loop .endloop: funcend ;; Main entry point for our raw-asm 2D game engine RG_Main: funcstart stksetup mprintf {"%s",10}, "Starting RGE..." ; Call a boot/setup function. So that we can configure ; our global variables. ; NOTE: We maybe could do this in reverse, like normally. ; Call RG_* functions from the main game entry point ; and provide this code as a static library of sorts ; On the other hand, this way we let the engine do the starting ; and just provide our code in the relevant designated function ; addresse mcall RG_DebugPrint, "Calling game_boot..." call game_boot call RG_Subsys_Img_init test rax,rax ccall z, RG_Abort call RG_Subsys_TTF_init test rax,rax ccall s, RG_Abort call RG_Init test rax,rax ccall z, RG_Abort mov byte [RG.gRunning], 1 mcall RG_DebugPrint, "Calling game_init ..." call game_init call RG_MainLoop xor rax,rax ; return 0 as success funcend ;; Centers camera given a point and desired center point ;; @param x ;; @param y ;; @param center_x ;; @param center_y RG_Camera_Center: funcstart stksetup mov r11, areg1 cmp r11, areg3 ifc g mov r8d, r11d sub r8d, areg3d mov r9d, [MainWindow.w] mov r10d, r8d add r10d, r9d cmp r10d, [Camera.w] ifc l mov dword [Camera.x], r8d endif endif mov r11, areg2 cmp r11, areg4 ifc g sub r11d, areg4d mov r9d, [MainWindow.h] mov r10d, r11d add r10d, r9d cmp r10d, [Camera.h] ifc l mov dword [Camera.y], r11d endif endif funcend ;; Simple printing of debugging info ;; @param 1 - Address of string to print RG_DebugPrint: %ifdef __?DEBUG_FORMAT?__ funcstart saveregs areg1, areg2, rbx, areg3, areg4, areg5, areg6, r10, r11, r12, r13, r14, r15 stksetup mov areg2, areg1 lea areg1, [dbgfmt] exmcall printf funcend %else ; only return, this is for release binaries ; that do not need verbose debugging info ret %endif ret ;; Printing with printf in a way that does not destroy ;; any registers ;; @param 1 - Address of format string ;; @param 2 - Address of string RG_Printf: funcstart saveregs\ xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,\ xmm6,xmm7,xmm8,xmm9,xmm10,xmm11,\ xmm12,xmm13,xmm14,xmm15,\ rax,rbx,rcx,rdx,rsi,rdi,\ r8,r9,r10,r11,r12,r13,r14,r15 stksetup exmcall printf funcend ;; Printing with printf a decimal number RG_PrintDecimal: funcstart saveregs\ xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,\ xmm6,xmm7,xmm8,xmm9,xmm10,xmm11,\ xmm12,xmm13,xmm14,xmm15,\ rax,rbx,rcx,rdx,rsi,rdi,\ r8,r9,r10,r11,r12,r13,r14,r15 stksetup string2reg areg1, {"%d",10} exmcall printf funcend .