From 22fd7477386765ca4ead56ab8440523f51bb4c1c Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 23 Mar 2026 13:51:33 -0700 Subject: [PATCH 1/7] modified: src/benchmarks/sortBenchmark.h --- src/benchmarks/sortBenchmark.h | 33 +++++++++++++----------- src/desktopMain.c | 5 ++-- src/query-interface/sort/adaptive_sort.c | 6 ++++- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index 174c4cd7..fa7c278f 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -67,7 +67,7 @@ int sortQueryBenchmark() { stateUWA->eraseSizeInPages = 4; stateUWA->numDataPages = 20000; stateUWA->numIndexPages = 1000; - stateUWA->numSplinePoints = 30; + stateUWA->numSplinePoints = 120; if (STORAGE_TYPE == 1) { printf("Dataflash is not currently supported. Defaulting to SD card interface."); @@ -92,6 +92,9 @@ int sortQueryBenchmark() { return -1; } + stateUWA->rules = NULL; + stateUWA->numRules = 0; + int8_t colSizes[] = {4, 4, 4, 4}; int8_t colSignedness[] = {embedDB_COLUMN_UNSIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED, embedDB_COLUMN_SIGNED}; ColumnType colTypes[] = {embedDB_COLUMN_UINT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32, embedDB_COLUMN_INT32}; @@ -121,22 +124,22 @@ int sortQueryBenchmark() { } } - printf("\nSort followed by Projection\n"); - // Perform sort runs with different numbers of values - for (int i = 0; i < 5; i++) { - printf("%d values:\n", num_values[i]); + // printf("\nSort followed by Projection\n"); + // // Perform sort runs with different numbers of values + // for (int i = 0; i < 5; i++) { + // printf("%d values:\n", num_values[i]); - // Repeat each run for consistency - for (int j = 0; j < 1; j++) { - clock_gettime(CLOCK_MONOTONIC, &start_time); + // // Repeat each run for consistency + // for (int j = 0; j < 1; j++) { + // clock_gettime(CLOCK_MONOTONIC, &start_time); - sort_order_first(num_values[i], stateUWA, baseSchema); + // sort_order_first(num_values[i], stateUWA, baseSchema); - clock_gettime(CLOCK_MONOTONIC, &end_time); - double elapsed_ms = time_diff_ms(start_time, end_time); - printf("\tElapsed time: %.3f ms\n", elapsed_ms); - } - } + // clock_gettime(CLOCK_MONOTONIC, &end_time); + // double elapsed_ms = time_diff_ms(start_time, end_time); + // printf("\tElapsed time: %.3f ms\n", elapsed_ms); + // } + // } // Close embedDB embedDBClose(stateUWA); @@ -159,7 +162,7 @@ void sort_order_last(int32_t numValues, embedDBState* stateUWA, embedDBSchema* b embedDBInitIterator(stateUWA, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); - uint8_t projColsOB[] = {0, 1}; + uint8_t projColsOB[] = {0, 3}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, numValues, merge_sort_int32_comparator); orderByOp->init(orderByOp); diff --git a/src/desktopMain.c b/src/desktopMain.c index 793f53e2..9915904b 100644 --- a/src/desktopMain.c +++ b/src/desktopMain.c @@ -6,7 +6,7 @@ * */ #ifndef WHICH_PROGRAM -#define WHICH_PROGRAM 0 +#define WHICH_PROGRAM 4 #endif #if WHICH_PROGRAM == 0 @@ -18,8 +18,9 @@ #elif WHICH_PROGRAM == 3 #include "benchmarks/queryInterfaceBenchmark.h" #elif WHICH_PROGRAM == 4 -#include "benchmarks/activeRulesBenchmark.h" #include "benchmarks/sortBenchmark.h" +#elif WHICH_PROGRAM == 5 +#include "benchmarks/activeRulesBenchmark.h" #endif int main() { diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 07f9e036..a1b1567f 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -686,6 +686,7 @@ int adaptive_sort( // No merge phase necessary if (numSublist == 1) { + printf("done after replacement\n"); ((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); *resultFilePtr = 0; return 0; @@ -719,6 +720,7 @@ int adaptive_sort( // Make decision to use either no output buffer sort or MinSort if (avgDistinct / 10 < nobSortCost) { + printf("min\n"); /* */ /* MinSort */ /* */ @@ -742,7 +744,9 @@ int adaptive_sort( *resultFilePtr = lastWritePos; flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); } - } else { + } + else { + printf("nob\n"); /* */ /* No Output Buffer Sort Merge */ /* */ From 4e455144c1ab66c0ddf1f14877ab36561fc70476 Mon Sep 17 00:00:00 2001 From: xelArga Date: Mon, 23 Mar 2026 14:15:09 -0700 Subject: [PATCH 2/7] fixed tmp file posix naming issue and added unistd for close system call. --- lib/Desktop-File-Interface/desktopFileInterface.c | 7 ++++--- src/benchmarks/sortBenchmark.h | 6 ++++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index 8fb4a7cc..ee6839e6 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -159,12 +159,13 @@ char *tempFilePath(void) { #else /* POSIX systems */ - snprintf(tempPathBuffer, sizeof(tempPathBuffer), - "/tmp/embeddb_%luXXXXXX", (unsigned long)rand()); + snprintf(tempPathBuffer, sizeof(tempPathBuffer), "/tmp/embeddb_XXXXXX"); int fd = mkstemp(tempPathBuffer); if (fd >= 0) { - close(fd); + close(fd); // Now unistd.h will make this work! + } else { + perror("mkstemp failed"); } #endif diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index fa7c278f..da67be4d 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -9,6 +9,12 @@ #include "query-interface/advancedQueries.h" #include "query-interface/sort/in_memory_sort.h" +#if defined(_WIN32) || defined(_WIN64) +#include +#else +#include +#endif + /** * 0 = SD Card * 1 = Dataflash From d2c7b4c1522ff04a4a7b2fbe8c207c93418639b4 Mon Sep 17 00:00:00 2001 From: xelArga Date: Mon, 23 Mar 2026 14:22:49 -0700 Subject: [PATCH 3/7] removed the call from the benchmark to the file interface --- lib/Desktop-File-Interface/desktopFileInterface.c | 8 +++++++- src/benchmarks/sortBenchmark.h | 6 ------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/Desktop-File-Interface/desktopFileInterface.c b/lib/Desktop-File-Interface/desktopFileInterface.c index ee6839e6..ebefa640 100644 --- a/lib/Desktop-File-Interface/desktopFileInterface.c +++ b/lib/Desktop-File-Interface/desktopFileInterface.c @@ -1,3 +1,9 @@ +#if defined(_WIN32) || defined(_WIN64) +#include +#else +#include +#endif + #include "desktopFileInterface.h" typedef struct { @@ -163,7 +169,7 @@ char *tempFilePath(void) { int fd = mkstemp(tempPathBuffer); if (fd >= 0) { - close(fd); // Now unistd.h will make this work! + close(fd); } else { perror("mkstemp failed"); } diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index da67be4d..fa7c278f 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -9,12 +9,6 @@ #include "query-interface/advancedQueries.h" #include "query-interface/sort/in_memory_sort.h" -#if defined(_WIN32) || defined(_WIN64) -#include -#else -#include -#endif - /** * 0 = SD Card * 1 = Dataflash From ace4298b1c5850daded2ea1480339515c6c52fbd Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 27 Apr 2026 12:32:35 -0700 Subject: [PATCH 4/7] new update for fixes to adaptive sort and the platform io for distribution testing --- platformio.ini | 4 + src/benchmarks/sortBenchmark.h | 63 +++++++++---- src/desktopMain.c | 2 +- src/dueMain.cpp | 4 + src/query-interface/activeRules.c | 8 +- src/query-interface/sort/adaptive_sort.c | 88 +++++++------------ src/query-interface/sort/flash_minsort.c | 8 +- .../sort/flash_minsort_sublist.c | 9 +- src/query-interface/sort/no_output_heap.c | 2 +- src/query-interface/sort/sortWrapper.c | 4 +- .../test_in_memory_quick_sort.cpp | 0 test/test_sort/test_sort_query_interface.cpp | 88 +++++++++++++++++-- 12 files changed, 186 insertions(+), 94 deletions(-) rename test/{test_in_memort_quick_sort => test_in_memory_quick_sort}/test_in_memory_quick_sort.cpp (100%) diff --git a/platformio.ini b/platformio.ini index 44e7ffd7..b43338e4 100644 --- a/platformio.ini +++ b/platformio.ini @@ -7,10 +7,13 @@ ; ; Please visit documentation for the other options and examples ; https://docs.platformio.org/page/projectconf.html +[platformio] +test_transport = custom [env:desktop] platform = native test_build_src = true +test_framework = unity build_src_filter = +<**/*.c> +<**/*.cpp> @@ -28,6 +31,7 @@ build_type = debug [env:desktop-dist] platform = native test_build_src = true +test_framework = unity build_src_filter = +<**/*.c> +<**/*.cpp> diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index fa7c278f..7ba994c9 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -48,6 +48,19 @@ #endif void insertData(embedDBState* state, const char* filename); +void insertNValues(embedDBState* state, int32_t n) { + int32_t key = 0, value = 0; + + for (int32_t i = 0; i < n; i++) { + if (i % 10 == 0) { + value = 0; + } + embedDBPut(state, &key, &value); + key++; + value++; + + } +} void sort_order_last(int32_t numValues, embedDBState* stateUWA, embedDBSchema* baseSchema); void sort_order_first(int32_t numValues, embedDBState* stateUWA, embedDBSchema* baseSchema); @@ -102,9 +115,14 @@ int sortQueryBenchmark() { // Insert data const char datafileName[] = "data/uwa500K.bin"; - insertData(stateUWA, datafileName); + //insertData(stateUWA, datafileName); + insertNValues(stateUWA, 500000); +#ifdef ARDUINO + uint32_t start_time, end_time; +#else struct timespec start_time, end_time; +#endif int32_t num_values[] = {100, 1000, 10000, 100000, 500000}; printf("\nProjection followed by Sort\n"); @@ -114,32 +132,41 @@ int sortQueryBenchmark() { // Repeat each run for consistency for (int j = 0; j < 1; j++) { +#ifdef ARDUINO + start_time = clock(); +#else clock_gettime(CLOCK_MONOTONIC, &start_time); +#endif sort_order_last(num_values[i], stateUWA, baseSchema); - +#ifdef ARDUINO + end_time = clock(); + uint32_t elapsed_time = end_time - start_time; + printf("\tElapsed time: %u ms\n", elapsed_time); +#else clock_gettime(CLOCK_MONOTONIC, &end_time); - double elapsed_ms = time_diff_ms(start_time, end_time); - printf("\tElapsed time: %.3f ms\n", elapsed_ms); + double elapsed_time = time_diff_ms(start_time, end_time); + printf("\tElapsed time: %.3f ms\n", elapsed_time); +#endif } } - // printf("\nSort followed by Projection\n"); - // // Perform sort runs with different numbers of values - // for (int i = 0; i < 5; i++) { - // printf("%d values:\n", num_values[i]); + printf("\nSort followed by Projection\n"); + // Perform sort runs with different numbers of values + for (int i = 0; i < 5; i++) { + printf("%d values:\n", num_values[i]); - // // Repeat each run for consistency - // for (int j = 0; j < 1; j++) { - // clock_gettime(CLOCK_MONOTONIC, &start_time); + // Repeat each run for consistency + for (int j = 0; j < 1; j++) { + clock_gettime(CLOCK_MONOTONIC, &start_time); - // sort_order_first(num_values[i], stateUWA, baseSchema); + sort_order_first(num_values[i], stateUWA, baseSchema); - // clock_gettime(CLOCK_MONOTONIC, &end_time); - // double elapsed_ms = time_diff_ms(start_time, end_time); - // printf("\tElapsed time: %.3f ms\n", elapsed_ms); - // } - // } + clock_gettime(CLOCK_MONOTONIC, &end_time); + double elapsed_ms = time_diff_ms(start_time, end_time); + printf("\tElapsed time: %.3f ms\n", elapsed_ms); + } + } // Close embedDB embedDBClose(stateUWA); @@ -162,7 +189,7 @@ void sort_order_last(int32_t numValues, embedDBState* stateUWA, embedDBSchema* b embedDBInitIterator(stateUWA, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(stateUWA, &it, baseSchema); - uint8_t projColsOB[] = {0, 3}; + uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(stateUWA, projColsOrderBy, 1, numValues, merge_sort_int32_comparator); orderByOp->init(orderByOp); diff --git a/src/desktopMain.c b/src/desktopMain.c index 9915904b..d4ae86ad 100644 --- a/src/desktopMain.c +++ b/src/desktopMain.c @@ -6,7 +6,7 @@ * */ #ifndef WHICH_PROGRAM -#define WHICH_PROGRAM 4 +#define WHICH_PROGRAM 0 #endif #if WHICH_PROGRAM == 0 diff --git a/src/dueMain.cpp b/src/dueMain.cpp index 57f50e63..82d7312e 100644 --- a/src/dueMain.cpp +++ b/src/dueMain.cpp @@ -69,6 +69,8 @@ static ArduinoOutStream cout(Serial); #include "benchmarks/variableDataBenchmark.h" #elif WHICH_PROGRAM == 3 #include "benchmarks/queryInterfaceBenchmark.h" +#elif WHICH_PROGRAM == 4 +#include "benchmarks/sortBenchmark.h" #endif #define ENABLE_DEDICATED_SPI 1 @@ -109,6 +111,8 @@ void setup() { test_vardata(); #elif WHICH_PROGRAM == 3 advancedQueryExample(); +#elif WHICH_PROGRAM == 4 + sortQueryBenchmark(); #endif } diff --git a/src/query-interface/activeRules.c b/src/query-interface/activeRules.c index 8fb1ba16..b253cda5 100644 --- a/src/query-interface/activeRules.c +++ b/src/query-interface/activeRules.c @@ -70,7 +70,7 @@ void executeRules(embedDBState* state, void* key, void* data) { case GET_CUSTOM: handleCustomQuery(state, state->rules[i], key, data); break; - default: + default:; #ifdef PRINT_ERRORS printf("ERROR: Unsupported rule type\n"); #endif @@ -177,7 +177,7 @@ embedDBOperator* createOperator(embedDBState* state, activeRule* rule, void*** a case GET_MIN: aggFunc = createMinAggregate(rule->colNum, rule->schema->columnSizes[rule->colNum]); break; - default: + default:; #ifdef PRINT_ERRORS printf("ERROR: Unsupported rule type\n"); #endif @@ -223,7 +223,7 @@ void executeComparison(activeRule* rule, void* aggregateValue, Comparator compar case NotEqual: if (comparisonResult != 0) rule->callback(aggregateValue, data, rule->context); break; - default: + default:; #ifdef PRINT_ERRORS printf("ERROR: Unsupported operation\n"); #endif @@ -266,7 +266,7 @@ void handleCustomQuery(embedDBState* state, activeRule* rule, void* key, void* d case DBDOUBLE: executeComparison(rule, result, doubleComparator, data); break; - default: + default:; #ifdef PRINT_ERRORS printf("ERROR: Unsupported return type\n"); #endif diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index a1b1567f..9d7e30e8 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -52,10 +52,10 @@ // #define DEBUG 1 // #define DEBUG_OUTPUT 1 -// #define DEBUG_READ 1 +// #define DEBUG_READ 1 // #define DEBUG_HEAP 0 // #define ADAPTIVE_SORT_PRINT -// #define ADAPTIVE_SORT_PRINT_FINISH +// #define ADAPTIVE_SORT_PRINT_FINISH #if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) || defined(DEBUG_HEAP) || defined(ADAPTIVE_SORT_PRINT) || defined(ADAPTIVE_SORT_PRINT_FINISH) #include "debug_print.h" #else @@ -141,14 +141,16 @@ int adaptive_sort( int16_t i, status; int32_t numSublist = 0; void* addr; + int8_t startNewSublist = 0; int32_t numShiftOutOutput = 0, numShiftIntoOutput = 0, numShiftOtherBlock = 0; + int16_t cpuPenalty = 3; /* Distribution estimation variables */ int16_t avgDistinct = 0; /* Average # of distinct values per run. Multiplied by 10 so can do integer rather than float operations. */ /* Note: Could be int8_t as larger than 255 is above cutoff for using MinSort. */ - uint8_t numDistinctInRun = 0; /* Number of distinct values in current run */ + uint16_t numDistinctInRun = 0; /* Number of distinct values in current run */ - int optimistic = false; + int optimistic = true; if (optimistic) { // Do FLASH MinSort init first #ifdef DEBUG @@ -165,9 +167,9 @@ int adaptive_sort( avgDistinct = 16; int16_t numPasses = (int)ceil(log(es->num_pages / bufferSizeInBlocks) / log(bufferSizeInBlocks)); - int32_t nobSortCost = numPasses * (10 + writeToReadRatio) / 10; + int32_t nobSortCost = (numPasses * (10 + writeToReadRatio) + 9) / 10; -#ifdef DEBUG +#ifdef ADAPTIVE_SORT_PRINT debug_log("Adaptive calculation.\n"); debug_log("NOB sort cost. # runs: %d", numSublist); debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); @@ -175,7 +177,7 @@ int adaptive_sort( debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); #endif - if (avgDistinct < nobSortCost) + if (avgDistinct * cpuPenalty < nobSortCost) // if (true) { #ifdef DEBUG @@ -368,6 +370,12 @@ int adaptive_sort( sublistSize, outputCount, heapSize, listSize, recordsLeft); #endif + if (startNewSublist) { + outputCount = 0; + haveOutputKey = 0; + sublistSize = 0; + startNewSublist = 0; + } // Read in page addr = buffer + es->headerSize; for (i = 0; i < tuplesPerPage; i++) { @@ -430,41 +438,6 @@ int adaptive_sort( // Swap output records into output buffer from heap if smaller than records currently there. (I/O block is id zero) for (i = 0; i < tuplesPerPage; i++) { - /* - * HEAP-EMPTY START NEW RUN TRANSITION - */ - if (heapSize == 0) { - if (listSize > 0) { - // Finish current run and start a new one - numSublist++; - metric->num_runs++; - - sublistSize = 0; - outputCount = 0; - haveOutputKey = 0; - -#ifdef DEBUG - debug_log("DEBUG: Heap empty → starting new run, promoting list (%d records)\n", - listSize); -#endif - - // Promote frozen list to heap - for (int32_t k = listSize - 1; k >= 0; k--) { - shiftUp_rev(buffer + heapStartOffset, - buffer + es->page_size + k * es->record_size, - heapSize, es, metric); - heapSize++; - } - listSize = 0; - - // Restart filling the output page for the new run - i = -1; - continue; - } else { - // No heap, no list → nothing left to output - break; - } - } #ifdef DEBUG if (i < 3 || i == tuplesPerPage - 1) { // Only log first 3 and last iteration debug_log(" Inner loop i=%d: recordsRead=%d, outputCount=%d, recordsLeft=%d, heapSize=%d\n", @@ -473,6 +446,16 @@ int adaptive_sort( #endif // Check if we've read all records from the current page if (recordsRead == 0 || i >= recordsRead) { + heapVal = buffer + heapStartOffset; + if (haveOutputKey && heapSize > 0 && + es->compare_fcn(heapVal + es->key_offset, + lastOutputKey + es->key_offset) < 0) { + numSublist++; + metric->num_runs++; + numDistinctInRun = 1; + startNewSublist = 1; + break; + } // Check if there are any records left if (recordsLeft <= 0) { break; @@ -540,7 +523,7 @@ int adaptive_sort( memcpy(buffer + es->headerSize + i * es->record_size, buffer + heapStartOffset, es->record_size); /* Heap into input/output block */ metric->num_memcpys += 2; // Determine if the value is different than the last one to estimate the number of distinct values - if (numDistinctInRun < 255 && haveOutputKey) { + if (haveOutputKey) { // Value is different metric->num_compar++; if (es->compare_fcn(lastOutputKey + es->key_offset, inputVal + es->key_offset) < 0) @@ -659,7 +642,6 @@ int adaptive_sort( #endif numDistinctInRun = 0; } /* end pessmistic */ - #ifdef DEBUG debug_log("\n=== REPLACEMENT SELECTION COMPLETE ===\n"); debug_log("Number of sublists created: %d\n", numSublist); @@ -674,8 +656,8 @@ int adaptive_sort( uint32_t blockIdx = *((uint32_t*)buffer); uint16_t count = *((uint16_t*)(buffer + BLOCK_COUNT_OFFSET)); - debug_log("Block %d: blockIdx=%u, count=%u, first 10 values:", debugBlock, blockIdx, count); - for (int v = 0; v < count && v < 10; v++) { + debug_log("Block %d: blockIdx=%u, count=%u,", debugBlock, blockIdx, count); + for (int v = 0; v < count && v < 63; v++) { debug_log(" %d", *(int32_t*)(buffer + es->headerSize + v * es->record_size + es->key_offset)); } if (count > 10) debug_log(" ..."); @@ -686,7 +668,6 @@ int adaptive_sort( // No merge phase necessary if (numSublist == 1) { - printf("done after replacement\n"); ((file_iterator_state_t*)iteratorState)->fileInterface->flush(outputFile); *resultFilePtr = 0; return 0; @@ -708,6 +689,7 @@ int adaptive_sort( int16_t numPasses = (int)ceil(log(numSublist) / log(bufferSizeInBlocks)); int32_t nobSortCost = numPasses * (10 + writeToReadRatio) / 10; + int32_t minSortCost = avgDistinct / 10 * cpuPenalty; #ifdef ADAPTIVE_SORT_PRINT debug_log("Adaptive calculation.\n"); @@ -715,12 +697,12 @@ int adaptive_sort( debug_log(" # passes: %d cost: %d\n", numPasses, nobSortCost); debug_log("MinSort cost. Num sublists: %d ", numSublist); debug_log(" Avg. distinct/sublist: %d\n", avgDistinct / 10); - + debug_log("Sublist version possible: %d\n", sublistVersionPossible); + debug_log("Buffer size bytes: %d Sort key size: %d\n", bufferSizeBytes, SORT_KEY_SIZE); #endif // Make decision to use either no output buffer sort or MinSort - if (avgDistinct / 10 < nobSortCost) { - printf("min\n"); + if (minSortCost < nobSortCost) { /* */ /* MinSort */ /* */ @@ -744,9 +726,7 @@ int adaptive_sort( *resultFilePtr = lastWritePos; flash_minsort(iteratorState, tupleBuffer, outputFile, buffer, bufferSizeBytes, es, resultFilePtr, metric, compareFn); } - } - else { - printf("nob\n"); + } else { /* */ /* No Output Buffer Sort Merge */ /* */ @@ -1483,7 +1463,7 @@ int adaptive_sort( /* end of run */ } - if (record2[0] != -1) { /* Tuples in output block to write out */ + if (record2[0] > 0) { /* Tuples in output block to write out */ // fseek(outputFile, lastWritePos, SEEK_SET); // if (0 == fwrite(buffer + OUTPUT_BLOCK_ID * es->page_size, (size_t)es->page_size, 1, outputFile)) // { /* File write error - arduino prints 1st value nmemb times if nmemb != 1 */ diff --git a/src/query-interface/sort/flash_minsort.c b/src/query-interface/sort/flash_minsort.c index a9835618..caa72bbd 100644 --- a/src/query-interface/sort/flash_minsort.c +++ b/src/query-interface/sort/flash_minsort.c @@ -52,7 +52,8 @@ This is no output sort with block headers and iterator input. Heap used when mov // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 -#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) +// #define MINSORT_END 1 +#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) || defined(MINSORT_END) #include "debug_print.h" #else #ifndef debug_log @@ -502,8 +503,9 @@ int flash_minsort( clock_t end = clock(); #endif -#ifdef DEBUG - debug_log("Complete. Comparisons: %d MemCopies: %d\n", metric->num_compar, metric->num_memcpys); +#ifdef MINSORT_END + debug_log("Complete. Comparisons: %d Reads: %d Writes: %d MemCopies: %d\n", metric->num_compar, metric->num_reads, + metric->num_writes, metric->num_memcpys); #endif return 0; // Successful completion diff --git a/src/query-interface/sort/flash_minsort_sublist.c b/src/query-interface/sort/flash_minsort_sublist.c index daa6d778..a3f1700a 100644 --- a/src/query-interface/sort/flash_minsort_sublist.c +++ b/src/query-interface/sort/flash_minsort_sublist.c @@ -49,7 +49,8 @@ // #define DEBUG 1 // #define DEBUG_OUTPUT 1 // #define DEBUG_READ 1 -#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) +// #define MINSORT_SUBLIST_END 1 +#if defined(DEBUG) || defined(DEBUG_OUTPUT) || defined(DEBUG_READ) || defined(MINSORT_SUBLIST_END) #include "debug_print.h" #else #ifndef debug_log @@ -429,7 +430,9 @@ int flash_minsort_sublist( free(ms.current); free(ms.next); - // printf("Complete. Comparisons: %d MemCopies: %d TransferIn: %d TransferOut: %d TransferOther: %d\n", metric->num_compar, metric->num_memcpys, numShiftIntoOutput, numShiftOutOutput, numShiftOtherBlock); - +#ifdef MINSORT_SUBLIST_END + debug_log("Complete. Comparisons: %d Reads: %d Writes: %d MemCopies: %d\n", metric->num_compar, metric->num_reads, + metric->num_writes, metric->num_memcpys); +#endif return 0; } diff --git a/src/query-interface/sort/no_output_heap.c b/src/query-interface/sort/no_output_heap.c index bfd7319c..7e6fcc89 100644 --- a/src/query-interface/sort/no_output_heap.c +++ b/src/query-interface/sort/no_output_heap.c @@ -96,7 +96,7 @@ void shiftUp(char* buffer, parent = (idx - 1) / 2; metric->num_compar++; - if (es->compare_fcn(input_tuple, buffer + parent * es->record_size + es->key_offset) >= 0) { + if (es->compare_fcn(input_tuple + es->key_offset, buffer + parent * es->record_size + es->key_offset) >= 0) { break; } metric->num_memcpys++; diff --git a/src/query-interface/sort/sortWrapper.c b/src/query-interface/sort/sortWrapper.c index c07d0e17..17ae67b7 100644 --- a/src/query-interface/sort/sortWrapper.c +++ b/src/query-interface/sort/sortWrapper.c @@ -389,8 +389,8 @@ file_iterator_state_t *startSort(sortData *data, void *unsortedFile, void *sorte int err; - int8_t runGenOnly = false; // Run full sort operation - int8_t writeReadRatio = 19; // 1.97 * 10 => 19 + int8_t runGenOnly = false; // Run full sort operation + int8_t writeReadRatio = 40; err = adaptive_sort(readNextRecord, iteratorState, tuple_buffer, sortedFile, buffer, buffer_max_pages, &es, &result_file_ptr, &metrics, data->compareFn, runGenOnly, writeReadRatio, data); #ifdef PRINT_METRIC diff --git a/test/test_in_memort_quick_sort/test_in_memory_quick_sort.cpp b/test/test_in_memory_quick_sort/test_in_memory_quick_sort.cpp similarity index 100% rename from test/test_in_memort_quick_sort/test_in_memory_quick_sort.cpp rename to test/test_in_memory_quick_sort/test_in_memory_quick_sort.cpp diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index 6636b553..d6a3c797 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -147,6 +147,19 @@ void insertNValues(embedDBState* state, int32_t n, int8_t mode) { } } +void insertNValues2(embedDBState* state, int32_t n) { + int32_t key = 0, value = 0; + + for (int32_t i = 0; i < n; i++) { + if (i % 10 == 0) { + value = 0; + } + embedDBPut(state, &key, &value); + key++; + value++; + } +} + void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { op->init(op); int32_t* buffer = (int32_t*)op->recordBuffer; @@ -154,14 +167,47 @@ void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { for (uint32_t i = 0; i <= numValues; ++i) { exec(op); printf("%i ", (int32_t)buffer[col]); + if (i % 10 == 0) + printf("\n"); } printf("\n"); - // fflush(stdout); + //fflush(stdout); } void runTestSequentialValues() { // Insert test data - insertNValues(state, 300, 1); + insertNValues(state, 300, 0); + + embedDBIterator it; + it.minKey = NULL; + it.maxKey = NULL; + it.minData = NULL; + it.maxData = NULL; + embedDBInitIterator(state, &it); + embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); + uint8_t projColsOB[] = {0, 1}; + embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); + embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); + orderByOp->init(orderByOp); + + int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; + exec(orderByOp); + int32_t previous = ((int32_t)recordBuffer[1]); + int recordCount = 0; + + while (exec(orderByOp)) { + TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); + previous = ((int32_t)recordBuffer[1]); + recordCount++; + } + + orderByOp->close(orderByOp); + embedDBFreeOperatorRecursive(&orderByOp); +} + +void runTestReverseSequentialValues() { + // Insert test data + insertNValues(state, 1000, 1); embedDBIterator it; it.minKey = NULL; @@ -173,8 +219,37 @@ void runTestSequentialValues() { uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - // debugBinData(orderByOp, 70, 1); + orderByOp->init(orderByOp); + + int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; + exec(orderByOp); + int32_t previous = ((int32_t)recordBuffer[1]); + int recordCount = 0; + + while (exec(orderByOp)) { + TEST_ASSERT_GREATER_OR_EQUAL_INT32_MESSAGE(previous, ((int32_t)recordBuffer[1]), "Sort value is not greater than or equal to previous value."); + previous = ((int32_t)recordBuffer[1]); + recordCount++; + } + orderByOp->close(orderByOp); + embedDBFreeOperatorRecursive(&orderByOp); +} + +void runTestRepeatedValues() { + // Insert test data + insertNValues2(state, 1000); + + embedDBIterator it; + it.minKey = NULL; + it.maxKey = NULL; + it.minData = NULL; + it.maxData = NULL; + embedDBInitIterator(state, &it); + embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); + uint8_t projColsOB[] = {0, 1}; + embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); + embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); orderByOp->init(orderByOp); int32_t* recordBuffer = (int32_t*)orderByOp->recordBuffer; @@ -205,14 +280,9 @@ void runTestUsingSEA100k() { embedDBInitIterator(state, &it); embedDBOperator* scanOpOrderBy = createTableScanOperator(state, &it, baseSchema); - // debugBinData(scanOpOrderBy, 200, 0); uint8_t projColsOB[] = {0, 1}; embedDBOperator* projColsOrderBy = createProjectionOperator(scanOpOrderBy, 2, projColsOB); - // int32_t selVal = 100; - // embedDBOperator* selectOp = createSelectionOperator(scanOpOrderBy, 3, SELECT_GTE, &selVal); - // debugBinData(projColsOrderBy, 300, 1); embedDBOperator* orderByOp = createOrderByOperator(state, projColsOrderBy, 1, -1, int32Comparator); - // debugBinData(orderByOp, 100, 1); orderByOp->init(orderByOp); @@ -232,6 +302,8 @@ void runTestUsingSEA100k() { int runUnityTests() { UNITY_BEGIN(); RUN_TEST(runTestSequentialValues); + RUN_TEST(runTestReverseSequentialValues); + RUN_TEST(runTestRepeatedValues); RUN_TEST(runTestUsingSEA100k); return UNITY_END(); } From f970e173e1323ed78d92246b6b307ebdc10e68a4 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 27 Apr 2026 12:39:59 -0700 Subject: [PATCH 5/7] removed unecessary + 9 to cost calculation --- src/query-interface/sort/adaptive_sort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query-interface/sort/adaptive_sort.c b/src/query-interface/sort/adaptive_sort.c index 9d7e30e8..67ea0015 100644 --- a/src/query-interface/sort/adaptive_sort.c +++ b/src/query-interface/sort/adaptive_sort.c @@ -167,7 +167,7 @@ int adaptive_sort( avgDistinct = 16; int16_t numPasses = (int)ceil(log(es->num_pages / bufferSizeInBlocks) / log(bufferSizeInBlocks)); - int32_t nobSortCost = (numPasses * (10 + writeToReadRatio) + 9) / 10; + int32_t nobSortCost = (numPasses * (10 + writeToReadRatio)) / 10; #ifdef ADAPTIVE_SORT_PRINT debug_log("Adaptive calculation.\n"); From a657aacc63734b94352ed2cd8e529ef61881d587 Mon Sep 17 00:00:00 2001 From: Alex Argatoff Date: Mon, 27 Apr 2026 12:47:21 -0700 Subject: [PATCH 6/7] updated clang formatting --- src/benchmarks/sortBenchmark.h | 3 +-- test/test_sort/test_sort_query_interface.cpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/benchmarks/sortBenchmark.h b/src/benchmarks/sortBenchmark.h index 7ba994c9..89cc3172 100644 --- a/src/benchmarks/sortBenchmark.h +++ b/src/benchmarks/sortBenchmark.h @@ -58,7 +58,6 @@ void insertNValues(embedDBState* state, int32_t n) { embedDBPut(state, &key, &value); key++; value++; - } } void sort_order_last(int32_t numValues, embedDBState* stateUWA, embedDBSchema* baseSchema); @@ -115,7 +114,7 @@ int sortQueryBenchmark() { // Insert data const char datafileName[] = "data/uwa500K.bin"; - //insertData(stateUWA, datafileName); + // insertData(stateUWA, datafileName); insertNValues(stateUWA, 500000); #ifdef ARDUINO diff --git a/test/test_sort/test_sort_query_interface.cpp b/test/test_sort/test_sort_query_interface.cpp index d6a3c797..958ce964 100644 --- a/test/test_sort/test_sort_query_interface.cpp +++ b/test/test_sort/test_sort_query_interface.cpp @@ -171,7 +171,7 @@ void debugBinData(embedDBOperator* op, uint32_t numValues, uint8_t col) { printf("\n"); } printf("\n"); - //fflush(stdout); + // fflush(stdout); } void runTestSequentialValues() { From d7267290515bd90af19d04d1b639dc15694cdd19 Mon Sep 17 00:00:00 2001 From: xelArga Date: Tue, 28 Apr 2026 10:00:47 -0700 Subject: [PATCH 7/7] removed unecessary flag in pio.ini --- platformio.ini | 3 --- 1 file changed, 3 deletions(-) diff --git a/platformio.ini b/platformio.ini index b43338e4..4120890a 100644 --- a/platformio.ini +++ b/platformio.ini @@ -7,9 +7,6 @@ ; ; Please visit documentation for the other options and examples ; https://docs.platformio.org/page/projectconf.html -[platformio] -test_transport = custom - [env:desktop] platform = native test_build_src = true