From 55f9d432373c31628f5c6dcf1a887a88862686e3 Mon Sep 17 00:00:00 2001 From: Lephenixnoir Date: Wed, 28 Jul 2021 22:51:03 +0200 Subject: [PATCH] Optimize loading speed (x2.7) and game speed (+35%) Loading is measured by RTC_GetTicks(). * Initial version: 9.8s This was a regression due to using 512-byte sectors instead of 4 kiB clusters as previously. * Do BFile reads of 4 kiB: 5.2s (-47%) Feels similar to original code, I'll take this as my baseline. * Test second half of Flash first: 3.6s (-31%) By reading from FLASH_FS_HINT to FLASH_END first many OS sectors can be skipped (without missing on other sectors just in case). * Load to XRAM instead or RAM with BFile The DMA is 10% slower to XRAM than to RAM, but this benefits memcmp() because of faster memory accesses through the operand bus. No effect at this point, but ends up saving 8% after memcmp is optimized. * Optimize memcmp for sectors: 3376 ms (-8%) The optimized memcmp uses word accesses for ROM (which is fastest), and weaves loop iterations to exploit superscalar parallelism. * Search sectors most likely to contain data first: 2744 ms (-19%) File fragments almost always start on 4-kiB boundaries between FLASH_FS_HINT and FLASH_END, so these are tested first. * Index most likely sectors, improve FLASH_FS_HINT: 2096 ms (-24%) Most likely sectors are indexed by first 4 bytes and binary searched, and a slightly larger region is considered for hints. The cache hits 119/129 fragments in my case. * Use optimized memcmp for consecutive fragments: 1408 ms (-33%) I only set it for the search of the first sector in each fragment and forgot to use it where it is really needed. x) Game speed is measured roughly by the time it takes to hit a wall by walking straight after spawning in Hangar. * Initial value: 4.4s * Use cached ROM when loading data from the WAD: 2.9s (-35%) Cached accesses are quite detrimental for sector search, I assume because everything is aligned like crazy, but it's still a major help when reading sequential data in real-time. --- CGDOOM-minisdk/CGDOOM/Makefile | 3 +- CGDOOM-minisdk/CGDOOM/platform.h | 32 +++- README | 5 +- cgdoom/cgdoom-asm.s | 105 ++++++++++++ cgdoom/cgdoom.c | 275 +++++++++++++++++++++---------- cgdoom/os.h | 12 +- 6 files changed, 337 insertions(+), 95 deletions(-) create mode 100644 cgdoom/cgdoom-asm.s diff --git a/CGDOOM-minisdk/CGDOOM/Makefile b/CGDOOM-minisdk/CGDOOM/Makefile index 8fe4866..a436705 100644 --- a/CGDOOM-minisdk/CGDOOM/Makefile +++ b/CGDOOM-minisdk/CGDOOM/Makefile @@ -6,7 +6,8 @@ RM=rm CFLAGS=-m4a-nofpu -mb -fgcse-sm -fgcse-las -fgcse-after-reload -Isrc -O3 -fmerge-all-constants -mhitachi -fuse-linker-plugin -Wall -Wextra -Wno-sign-compare -Wno-unused-but-set-variable -Wno-unused-but-set-parameter -I../../../../include -lgcc -L../../../../lib -I./ -I../../cgdoom -D_FXCG_MINICOMPAT LDFLAGS=$(CFLAGS) -nostartfiles -T../../../../toolchain/prizm.x -Wl,-static -lfxcg -lgcc -Wl,-Map=$(PROJ_NAME).map CSOURCES=$(wildcard ../../cgdoom/*.c) -OBJECTS=$(CSOURCES:.c=.o) +ASMSOURCES=$(wildcard ../../cgdoom/*.s) +OBJECTS=$(CSOURCES:.c=.o) $(ASMSOURCES:.s=.o) PROJ_NAME=CG_Doom BIN=$(PROJ_NAME).bin ELF=$(PROJ_NAME).elf diff --git a/CGDOOM-minisdk/CGDOOM/platform.h b/CGDOOM-minisdk/CGDOOM/platform.h index 9893e86..e7d0622 100644 --- a/CGDOOM-minisdk/CGDOOM/platform.h +++ b/CGDOOM-minisdk/CGDOOM/platform.h @@ -3,26 +3,46 @@ #define PLATFORM_H //--- -// WAD file access method (enable exactly one) +// WAD file access in Flash //--- +/* File access method (define exactly one) */ + /* Use BFile (100% accurate but slows down the game quite a bit because of reads happening all the time; mostly a good reference for testing) */ // #define CGDOOM_WAD_BFILE /* Search fragments in physical ROM when loading the game, and copy by hand from ROM to RAM during accesses (much faster) */ #define CGDOOM_WAD_MAPPING -/* Idem, but copy copy with DMA (even faster) */ -// #define CGDOOM_WAD_MAPPING_DMA /* Settings for file mappings: traverse the whole 32-MiB Flash */ -#define FLASH_START (0xA0000000) -#define FLASH_END (0xA2000000) -/* Storage unit is a cluster of 512 bytes */ +#define FLASH_START ((const void *)0xA0000000) +#define FLASH_END ((const void *)0xA2000000) +/* Where we expect the file system to start, approximately (this region is + searched first to hit sectors more quickly) */ +#define FLASH_FS_HINT ((const void *)0xA0C00000) +/* Flash too, but cached; slower for sector searches but much faster for actual + data loads while in-game */ +#define FLASH_CACHED_START ((const void *)0x80000000) +#define FLASH_CACHED_END ((const void *)0x82000000) + +/* Storage unit is a cluster of 512 bytes; Fugue tries to use clusters of 4 kiB + (8 sectors) but in exceptional circumstances cluster alignment can be lost + (such as when sectors are dead) */ #define FLASH_PAGE_SIZE 512 #define FLASH_PAGE_SIZE_LOG2 9 #define FLASH_PAGE_COUNT ((FLASH_END-FLASH_START) / FLASH_PAGE_SIZE) +/* Size of Bfile reads; performance is good when it's at least a cluster */ +#define FLASH_BFILE_UNIT 4096 + +/* Whether to index ROM sectors most likely to have data to use in sector + searches (comment out to disable) */ +#define FLASH_INDEX +/* Index contains 4 kiB cluster from FLASH_FS_HINT to FLASH_END; fragments are + almost always 4 kiB-aligned, and only occasionally not */ +#define FLASH_INDEX_SIZE ((FLASH_END-FLASH_FS_HINT) / 4096) + //--- #include "keyboard.hpp" diff --git a/README b/README index c1e4f9f..11aaf0e 100644 --- a/README +++ b/README @@ -7,10 +7,12 @@ Credit goes to: * Lephenixnoir for the final fixes and fx-CG 50 version. TODO: +-> Fix level recap texture +-> Fix screen not cleared when changing resolution, add larger resolutions +-> Try and support more WADs -> Reenable LTO if possible -> Try and use more memory regions in z_zone.c -> Overclocking etc. --> Improve file mapping speed (DMA) CGDOOM used to be compiled with the mini-SDK. However, it's become quite difficult to get a copy of that. Instead, this port is built with a slightly @@ -26,5 +28,6 @@ The differences are (I might push it later): * Linker script sets 500k of RAM instead of 64k * LTO disabled (hopefully it could be reenabled later) * Syscall memcpy() (apparently broken) replaced by fxlibc memcpy() +* fxlibc qsort() is added in libc/ [1] https://github.com/Jonimoose/libfxcg/ diff --git a/cgdoom/cgdoom-asm.s b/cgdoom/cgdoom-asm.s new file mode 100644 index 0000000..0156289 --- /dev/null +++ b/cgdoom/cgdoom-asm.s @@ -0,0 +1,105 @@ +.global _CGD_sector_memcmp +.align 4 + +# A pretty fast memcmp for 512-byte sectors, with equal(0)-different(1) output +# r4: 32-aligned pointer to sector in RAM (preferably 1-cycle operand bus RAM) +# r5: 32-aligned pointer to sector in ROM +# r6: 512 (ignored; for compatibility with memcmp prototype) +# +# There are two main ideas in this code: +# +# * Read with words, since such is the affinity of the ROM. (I don't know why.) +# I tested with longwords, the performance is much worse; bytes are somewhere +# in-between, which tormented me as I wondered why the most trivial memcmp() +# with poor assembler from libfxcg was faster than my hand-written function. +# +# * Weave iterations with smart register allocation to exploit superscalar +# parallelism. We read to r0/r1 while comparing r2/r3, then vice-versa. The +# two mov.w (LS) for one comparison execute in parallel with the cmp (EX) and +# bf (BR) of the previous comparison, so overall one comparison takes 2 +# cycles (plus any extra cycles in ROM reads if the cache isn't hit or +# doesn't respond immediately, and some loop overhead). +# +_CGD_sector_memcmp: + # For the first 32 bytes, compare as fast as possible to exit early + # when the sectors don't match (this saves a little bit). + mov #16, r7 +1: mov.w @r5+, r0 + mov.w @r4+, r1 + cmp/eq r0, r1 + bf .fail + dt r7 + bf 1b + + mov #30, r7 + +.line: + # There is a 2-cycle delay for the RAW dependency between each mov.b + # and the corresponding use. Here the delay is honored so there are no + # cycles lost to RAW dependencies. + + mov.w @r5+, r0 + nop + + mov.w @r4+, r1 + nop + + mov.w @r5+, r2 + nop + + mov.w @r4+, r3 + cmp/eq r0, r1 + + mov.w @r5+, r0 + bf .fail + + mov.w @r4+, r1 + cmp/eq r2, r3 + + mov.w @r5+, r2 + bf .fail + + mov.w @r4+, r3 + cmp/eq r0, r1 + + mov.w @r5+, r0 + bf .fail + + mov.w @r4+, r1 + cmp/eq r2, r3 + + mov.w @r5+, r2 + bf .fail + + mov.w @r4+, r3 + cmp/eq r0, r1 + + mov.w @r5+, r0 + bf .fail + + mov.w @r4+, r1 + cmp/eq r2, r3 + + mov.w @r5+, r2 + bf .fail + + mov.w @r4+, r3 + cmp/eq r0, r1 + + # These two can run in parallel (BR/EX) + bf .fail + cmp/eq r2, r3 + + bf .fail + + dt r7 + bf .line + +.success: + rts + mov #0, r0 + +.fail: + # We don't specify an order + rts + mov #1, r0 diff --git a/cgdoom/cgdoom.c b/cgdoom/cgdoom.c index 06907df..3aebc90 100644 --- a/cgdoom/cgdoom.c +++ b/cgdoom/cgdoom.c @@ -263,23 +263,40 @@ int CGDstrnicmp (const char*s1,const char*s2,int iLen) // // The file is obviously fragmented and Yatis reverse-engineered Fugue enough // to determine that storage units are sectors of 512 bytes. While clusters of -// 4 kiB are used too, a file might not start on the first sector of a cluster, -// and some sectors might also be dysfunctional. +// 4 kiB are used in general, a file might not start on the first sector of a +// cluster, and some sectors might also simply be dead. +// +// Although all 65536 Flash sectors are searched when needed, several +// heuristics are used: +// * The region between FLASH_FS_HINT and FLASH_END is searched first, since +// this is roughly where the filesystem is located. +// * All sectors aligned on 4-kiB boundaries between FLASH_FS_HINT and +// FLASH_END (of which there are 4096) are indexed by their first 4 bytes and +// binary searched for matches before anything else. // // See for Flash traversal parameters. /////////////////////////////////////////////////////////////////////////////// -static uint16_t const *cgdoom_wad_path = u"\\\\fls0\\doom.wad"; - -#ifdef CGDOOM_WAD_BFILE -/* File descriptor to WAD file, used in Flash_ReadFile calls from w_wad.c. */ -static int cgdoom_wad_fd = -1; -#endif - //The whole sound doesn't fir onto the RAM. //Reading per partes is not possible as this is synchronnous player (there would be silences when reading). -//So I read each page (4KB)of the wav file and try to find it in the flash. -//Simply finding start of the file is not enough because of fragmentation. + +static uint16_t const *gWADpath = u"\\\\fls0\\doom.wad"; + +/* Fast memcmp() for 512-byte sectors. */ +int CGD_sector_memcmp(const void *fast_ram, const void *rom, size_t _512); + +/* Caching structure to read WAD files by larger chunks than Flash sectors. */ +typedef struct { + int fd; + int size, offset; + char *data; /* of size FLASH_BFILE_UNIT */ +} FileAccessCache; + +/* Index of most likely ROM sectors. */ +typedef struct { + const void *sector; + uint32_t start_bytes; +} SectorIndexInfo; //allocate 1024 items for max 1024 fragments of the file. // 640 KB should to be enough for everyone ;-) @@ -314,6 +331,9 @@ void I_Error (char *error, ...); #ifdef CGDOOM_WAD_BFILE +/* File descriptor to WAD file, used in Flash_ReadFile calls from w_wad.c. */ +static int gWADfd = -1; + int FindInFlash(void **buf, int size, int readpos) { return 0; @@ -321,102 +341,168 @@ int FindInFlash(void **buf, int size, int readpos) int Flash_ReadFile(void *buf, int size, int readpos) { - return Bfile_ReadFile_OS(cgdoom_wad_fd, buf, size, readpos); + return Bfile_ReadFile_OS(gWADfd, buf, size, readpos); } -#else /* CGDOOM_WAD_MAPPING, CGDOOM_WAD_MAPPING_DMA */ +#else /* CGDOOM_WAD_MAPPING */ static FileMapping *gpWADMap = 0; +static SectorIndexInfo *gIndex = NULL; -int CreateFileMapping(const unsigned short *pFileName,FileMapping *pMap) +/* Read next sector from file, while caching into a buffer. */ +const void *ReadNextSector(FileAccessCache *fc, int *size) { - int iResult = 0; - char cBuffer[FLASH_PAGE_SIZE]; - int hFile = Bfile_OpenFile_OS(pFileName,0,0); - int iLength; - char *pFlashFS = (char *)FLASH_START; + if(fc->size == 0) + { + fc->size = Bfile_ReadFile_OS(fc->fd, fc->data, FLASH_BFILE_UNIT, -1); + fc->offset = 0; + } + if(fc->size <= 0) + { + *size = -1; + return NULL; + } + + *size = min(fc->size, FLASH_PAGE_SIZE); + fc->size -= *size; + const void *sector = fc->data + fc->offset; + fc->offset += *size; + return sector; +} + +/* Compare two sectors in ROM for the index. */ +int IndexCompareSectors(const void *p1, const void *p2) +{ + const SectorIndexInfo *i1 = p1; + const SectorIndexInfo *i2 = p2; + return i1->start_bytes - i2->start_bytes; +} + +/* Find all matching sectors in index (returns in-out interval). */ +void IndexSearchSector(SectorIndexInfo *index, const void *buf, int *lo_ptr, int *hi_ptr) +{ + uint32_t needle = *(const uint32_t *)buf; + *lo_ptr = *hi_ptr = -1; + + /* Find the first occurrence, set it in *lo_ptr */ + int lo=0, hi=FLASH_INDEX_SIZE; + + while(lo < hi) { + int m = (lo + hi) / 2; + int diff = index[m].start_bytes - needle; + + if(diff < 0) lo = m + 1; + else hi = m; + } + + if(lo >= FLASH_INDEX_SIZE || index[lo].start_bytes != needle) return; + *lo_ptr = hi = lo; + + /* Store last occurrence in *hi_ptr */ + do hi++; + while(hi < FLASH_INDEX_SIZE && index[hi].start_bytes == needle); + + *hi_ptr = hi; +} + +static int index_hits = 0; +static int sector_searches = 0; + +/* Find a flash sector which contains the same data as buf. */ +int FindSectorInFlash(const void *buf, int size) +{ + typeof(&memcmp) memcmp_fun = &memcmp; + if(size == FLASH_PAGE_SIZE) memcmp_fun = &CGD_sector_memcmp; + + sector_searches++; + +#ifdef FLASH_INDEX + /* If an index has been built, search in it */ + int lo, hi; + IndexSearchSector(gIndex, buf, &lo, &hi); + for(int i = lo; i < hi; i++) { + if(!memcmp_fun(buf, gIndex[i].sector, size)) { + index_hits++; + return (gIndex[i].sector - FLASH_START) / FLASH_PAGE_SIZE; + } + } +#endif + + const void *sector = FLASH_FS_HINT; + do { + if(!memcmp_fun(buf, sector, size)) + return (sector - FLASH_START) / FLASH_PAGE_SIZE; + + sector += FLASH_PAGE_SIZE; + if(sector == FLASH_END) + sector = FLASH_START; + } + while(sector != FLASH_FS_HINT); + return -1; +} + +int CreateFileMapping(int fd, FileMapping *pMap) +{ + /* Cache accesses through a larger buffer */ + FileAccessCache fc = { + .data = (void *)0xe5007000, /* XRAM */ + .fd = fd + }; + int iLength = 0; pMap->miItemCount = 0; pMap->miTotalLength = 0; - iLength = Bfile_ReadFile_OS(hFile,cBuffer,FLASH_PAGE_SIZE,-1); + + const void *pFileData = ReadNextSector(&fc, &iLength); while(iLength > 0) { - //do not optimize (= do not move these 2 variables before loop)! - // fx-cg allocates pages for file in order so page from the end of the file - //can have lower index than page from the beginning - const char *pTgt = pFlashFS; - int iPageIndx = 0; + int iSectorID = FindSectorInFlash(pFileData, iLength); + if(iSectorID == -1) + return -2; // Page not found! - for(;iPageIndx < FLASH_PAGE_COUNT;iPageIndx++) - { - if(!memcmp(pTgt,cBuffer,iLength)) - { - break; - } - pTgt += FLASH_PAGE_SIZE; - } - if(iPageIndx == FLASH_PAGE_COUNT) - { - //page not found ! - iResult = -2; - goto lbExit; - } - pMap->miItemCount ++; + pMap->miItemCount++; if(pMap->miItemCount >= MAX_FRAGMENTS) - { - //file too fragmented ! - iResult = -3; - goto lbExit; - } - pMap->mTable[pMap->miItemCount-1].msOffset = (unsigned short)iPageIndx; + return -3; // File too fragmented! + + pMap->mTable[pMap->miItemCount-1].msOffset = iSectorID; pMap->mTable[pMap->miItemCount-1].msCount = 0; - //assume fragment has more pages + + /* Look for consecutive sectors in the same fragment */ + const void *pFragment = FLASH_START + (iSectorID * FLASH_PAGE_SIZE); for(;;) { pMap->mTable[pMap->miItemCount-1].msCount++; pMap->miTotalLength += iLength; - iPageIndx++; - pTgt += FLASH_PAGE_SIZE; + iSectorID++; + pFragment += FLASH_PAGE_SIZE; if(iLength < FLASH_PAGE_SIZE) { //this was the last page - iResult = pMap->miTotalLength; - goto lbExit; + return pMap->miTotalLength; } - iLength = Bfile_ReadFile_OS(hFile,cBuffer,FLASH_PAGE_SIZE,-1); + pFileData = ReadNextSector(&fc, &iLength); if(iLength <= 0) - { break; - } - if(memcmp(pTgt,cBuffer,iLength)) - { - break; - } + if((iLength == FLASH_PAGE_SIZE) + ? CGD_sector_memcmp(pFileData, pFragment, iLength) + : memcmp(pFileData, pFragment, iLength)) + break; } } if(iLength < 0) - { - iResult = -1; - } - else - { - if(pMap->miTotalLength >50000) - { - pMap->miTotalLength = 50000;//hack - } + return -1; - iResult = pMap->miTotalLength; + if(pMap->miTotalLength >50000) + { + pMap->miTotalLength = 50000;//hack } - -lbExit: - Bfile_CloseFile_OS(hFile); - return iResult; + return pMap->miTotalLength; } -int FindInFlash(void **buf, int size, int readpos) +int FindInFlash(const void **buf, int size, int readpos) { - int iPageReq = readpos >>FLASH_PAGE_SIZE_LOG2; + int iPageReq = readpos / FLASH_PAGE_SIZE; int iPageIndx = 0; int iCurrOffset = 0, iCurrLen; int iSubOffset; @@ -434,7 +520,7 @@ int FindInFlash(void **buf, int size, int readpos) break; } iPageReq -= gpWADMap->mTable[iPageIndx].msCount; - iCurrOffset += ((int)gpWADMap->mTable[iPageIndx].msCount) << FLASH_PAGE_SIZE_LOG2; + iCurrOffset += ((int)gpWADMap->mTable[iPageIndx].msCount) * FLASH_PAGE_SIZE; iPageIndx++; } iSubOffset = readpos - iCurrOffset; @@ -444,13 +530,13 @@ int FindInFlash(void **buf, int size, int readpos) { iCurrLen = size; } - *buf = ((char *)FLASH_START)+(gpWADMap->mTable[iPageIndx].msOffset << FLASH_PAGE_SIZE_LOG2)+iSubOffset; + *buf = FLASH_CACHED_START + (gpWADMap->mTable[iPageIndx].msOffset * FLASH_PAGE_SIZE) + iSubOffset; return iCurrLen; } int Flash_ReadFile(void *buf, int size, int readpos) { - void *pSrc; + const void *pSrc; int iRet = 0; while(size >0) { @@ -479,7 +565,7 @@ void abort(void){ } /////////////////////////////////////////////////////////////////////////////////////////////////// int main(void){ - InitFlashSimu(cgdoom_wad_path); //load wad file to flash simulation on simulator, do nothing on real HW + InitFlashSimu(gWADpath); //load wad file to flash simulation on simulator, do nothing on real HW #ifdef CG_EMULATOR SaveVRAMBuffer = aSaveVRAMBuffer; SystemStack = aSystemStack; @@ -495,12 +581,29 @@ int main(void){ /* Setup access to WAD file */ #ifdef CGDOOM_WAD_BFILE - cgdoom_wad_fd = Bfile_OpenFile_OS(cgdoom_wad_path, 0, 0); + gWADfd = Bfile_OpenFile_OS(gWADpath, 0, 0); #else int time_start = RTC_GetTicks(); + + #ifdef FLASH_INDEX + /* Index most likely flash sectors into a sorted array, so that sectors + can be hit quickly. The index contains every sector on a 4-kiB + boundary (where fragments are most likely to start) between + FLASH_FS_HINT and FLASH_END. */ + gIndex = (void *)SystemStack; + for(int i = 0; i < FLASH_INDEX_SIZE; i++) { + SectorIndexInfo *info = &gIndex[i]; + info->sector = FLASH_FS_HINT + (i * 4096); + info->start_bytes = *(const uint32_t *)info->sector; + } + qsort(gIndex, FLASH_INDEX_SIZE, sizeof *gIndex, IndexCompareSectors); + #endif + gpWADMap = (FileMapping *)(SaveVRAMBuffer + 2*65536); ASSERT(2*65536 + sizeof(FileMapping) < SAVE_VRAM_SIZE); - int size = CreateFileMapping(cgdoom_wad_path,gpWADMap); + int fd = Bfile_OpenFile_OS(gWADpath,0,0); + int size = CreateFileMapping(fd, gpWADMap); + Bfile_CloseFile_OS(fd); int time_end = RTC_GetTicks(); if(size == -1) { @@ -515,17 +618,19 @@ int main(void){ I_Error ("File too fragmented"); return 1; } - else if(size < 0) { - I_ErrorI ("CreateFileMapping", size, 0, 0, 0); - return 1; - } else { int key; char line[22]; int time_ms = (time_end - time_start) * 8; - CGDAppendNum0_999("mmap (ms): ", time_ms, 1, line); Bdisp_AllClr_VRAM(); locate_OS(1, 1); + CGDAppendNum0_999("mmap (ms): ", time_ms, 1, line); + PrintLine(line, 21); + locate_OS(1, 2); + CGDAppendNum0_999("Searches: ", sector_searches, 1, line); + PrintLine(line, 21); + locate_OS(1, 3); + CGDAppendNum0_999("Index hits: ", index_hits, 1, line); PrintLine(line, 21); Bdisp_PutDisp_DD(); GetKey(&key); diff --git a/cgdoom/os.h b/cgdoom/os.h index 002944e..e9b1a4f 100644 --- a/cgdoom/os.h +++ b/cgdoom/os.h @@ -43,9 +43,17 @@ void I_ErrorI(const char *str, int i1, int i2, int i3, int i4); #define sprintf 212 //return ptr to flash -int FindInFlash(void **buf, int size, int readpos); +int FindInFlash(const void **buf, int size, int readpos); //direct read from flash int Flash_ReadFile(void *buf, int size, int readpos); //CGD: bypass for direct pointers to flash -#define PTR_TO_FLASH(x) (((int)x < FLASH_END) && ((int)x >= FLASH_START)) +#define PTR_TO_FLASH(x) ( \ + ((x) >= FLASH_START && (x) < FLASH_END) || \ + ((x) >= FLASH_CACHED_START && (x) < FLASH_CACHED_END)) + +#define min(x,y) ({ \ + __auto_type __x = (x); \ + __auto_type __y = (y); \ + __x < __y ? __x : __y; \ +})