diff --git a/.gitignore b/.gitignore index ccbd28d..8ed0b24 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ .settings/ .project .cproject +.vscode/ bin/ obj/ inputResource/*.csv diff --git a/profile.sh b/profile.sh index cde1395..3534615 100755 --- a/profile.sh +++ b/profile.sh @@ -11,7 +11,7 @@ export OMP_PROC_BIND=close #export OMP_PLACES={2}:64:1 export OMP_PLACES=threads -dataDir=./inputResource/ +dataDir=../ #STRUCTURES=(adListChunked adListShared degAwareRHH stinger) #STRUCTURES=(graphite adListChunked adListShared degAwareRHH stinger) STRUCTURES=(graphTango) @@ -32,10 +32,8 @@ ALGORITHMS=( # Max num_nodes to initialize for each dataset declare -A DATASETS DATASETS=( - [LiveJournal.csv]=4847571 - [orkut.csv]=3072441 - [wiki-topcats.csv]=1791489 - [WikiTalk.csv]=2394385 + [orkut.el]=3072441 +# [twitter.el]=61578415 # [rmat_1_1.csv]=1048576 # [rmat_1_2.csv]=1048576 # [rmat_1_4.csv]=1048576 diff --git a/src/dynamic/GraphTango.h b/src/dynamic/GraphTango.h index 2feb6e2..7263df7 100644 --- a/src/dynamic/GraphTango.h +++ b/src/dynamic/GraphTango.h @@ -43,27 +43,409 @@ class GraphTango : public dataStruc { #ifdef CALC_MEM_PER_EDGE cout << "Total memory req: " << globalAllocator.totMem << endl; #endif + } + +#if defined(USE_GT_LOAD_BALANCED) + Vertex* vArray; + + //VertexArray vArray; + const int num_threads; + +#if defined(CALC_TYPE_SWITCH) || defined(CALC_DYNNAMIC_TYPE_MAPPING) + typedef struct{ + u64 edgeCnt = 0; + u64 nodeCnt = 0; + u64 switchCnt = 0; + u64 type1 = 0; + u64 type2 = 0; + u64 type3 = 0; + u8 pad[16]; + } ThreadInfo; +#else + typedef struct{ + u64 edgeCnt = 0; + u64 nodeCnt = 0; + vector> inBuckets; + vector> outBuckets; + } ThreadInfo; +#endif + + alignas(64) ThreadInfo thInfo[32]; + + GraphTango(bool weighted, bool directed, i64 numNodes, i64 numThreads) : dataStruc(weighted, directed), num_threads(numThreads){ +#ifdef _OPENMP + if(numThreads > 0){ + omp_set_num_threads(numThreads); + } +#endif + vArray = (Vertex*)globalAllocator.allocate(sizeof(Vertex) * numNodes); + + #pragma omp parallel for + for(u64 i = 0; i < numNodes; i++){ + vArray[i].inEdges.degree = 0; + vArray[i].inEdges.capacity = EdgeArray::TH0; + + vArray[i].outEdges.degree = 0; + vArray[i].outEdges.capacity = EdgeArray::TH0; + } + + #pragma omp parallel for + for(i32 i = 0; i < numThreads; i++){ + thInfo[i].inBuckets.resize(LB_NUMBER_OF_BUCKETS); + thInfo[i].outBuckets.resize(LB_NUMBER_OF_BUCKETS); + } + + cout << "TH0: " << EdgeArray::TH0 << endl; + cout << "TH1: " << EdgeArray::TH1 << endl; + cout << "Sizeof ThreadInfo: " << sizeof(ThreadInfo) << endl; + cout << "Sizeof EdgeArray: " << sizeof(EdgeArray) << endl; + cout << "Sizeof Vertex: " << sizeof(Vertex) << endl; + +#ifdef _OPENMP + cout << "Max threads: " << omp_get_max_threads() << endl; +#endif + + property.resize(numNodes, -1); + affected.resize(numNodes); + affected.fill(false); + } + + virtual ~GraphTango(){ +#ifdef CALC_STATIC_TYPE_MAPPING + u64 type1 = 0; + u64 type2 = 0; + u64 type3 = 0; + for(u64 i = 0; i < num_nodes; i++){ + u64 numNeigh = in_degree(i); + if(numNeigh <= EdgeArray::TH0){ + type1++; + } + else if(numNeigh <= EdgeArray::TH1){ + type2++; + } + else{ + type3++; + } + numNeigh = out_degree(i); + if(numNeigh <= EdgeArray::TH0){ + type1++; + } + else if(numNeigh <= EdgeArray::TH1){ + type2++; + } + else{ + type3++; + } + } + cout << "Static type mapping: \n\tType1: " << type1 << "\t\tType2: " << type2 << "\t\tType3: " << type3 << endl; +#endif +#ifdef CALC_DYNNAMIC_TYPE_MAPPING + u64 dynType1 = 0; + u64 dynType2 = 0; + u64 dynType3 = 0; + for(u64 i = 0; i < num_threads; i++){ + dynType1 += thInfo[i].type1; + dynType2 += thInfo[i].type2; + dynType3 += thInfo[i].type3; + } + cout << "Dynamic type mapping: \n\tType1: " << dynType1 << "\t\tType2: " << dynType2 << "\t\tType3: " << dynType3 << endl; +#endif +#ifdef CALC_TYPE_SWITCH + u32 switchCnt = 0; + for(u64 i = 0; i < num_threads; i++){ + switchCnt += thInfo[i].switchCnt; + } + cout << "Switch Count: " << switchCnt << endl; +#endif + } + + int64_t in_degree(NodeID n) override { + return vArray[n].inEdges.degree; + } + + int64_t out_degree(NodeID n) override { + return vArray[n].outEdges.degree; + } + + void update(const EdgeList& el) override { + //probe = 0; + const u64 batchSize = el.size(); + +#ifdef _OPENMP + const u64 elemPerTh = ceil(1.0 * batchSize / num_threads); + + //clear the buckets + #pragma omp parallel for + for(i32 i = 0; i < num_threads; i++){ + ThreadInfo& th = thInfo[i]; + for(u32 j = 0; j < LB_NUMBER_OF_BUCKETS; j++){ + th.inBuckets[j].clear(); + th.outBuckets[j].clear(); + } + } + + //distribute edges to the buckets + #pragma omp parallel + { + const i64 actualTh = omp_get_thread_num(); + ThreadInfo& th = thInfo[actualTh]; + const u64 startIdx = actualTh * elemPerTh; + const u64 endIdx = (startIdx + elemPerTh) > batchSize ? batchSize : (startIdx + elemPerTh); + + for(u64 i = startIdx; i < endIdx; i++){ + const i64 src = el[i].source; + const i64 dst = el[i].destination; + if(!el[i].sourceExists){ + th.nodeCnt++; + } + if(!el[i].destExists){ + th.nodeCnt++; + } + const u64 srcBucketIdx = (src / 64) % LB_NUMBER_OF_BUCKETS; + const u64 dstBucketIdx = (dst / 64) % LB_NUMBER_OF_BUCKETS; + + th.inBuckets[dstBucketIdx].push_back(el[i]); + th.outBuckets[srcBucketIdx].push_back(el[i]); + } + } + + + u64 nextBucketIdx = 0; + + //process the buckets + #pragma omp parallel + { + const i64 actualTh = omp_get_thread_num(); + while(true){ + u64 currBucketId; + #pragma omp atomic capture + currBucketId = nextBucketIdx++; + if(currBucketId < LB_NUMBER_OF_BUCKETS){ + u64 garbage; + //in bucket + for(u32 thId = 0; thId < num_threads; thId++){ + const vector& bucket = thInfo[thId].inBuckets[currBucketId]; + for(const Edge& e : bucket){ + const i64 src = e.source; + const i64 dst = e.destination; + if(!affected[dst]){ + affected[dst] = true; + } + if(!e.isDelete){ //insertion to inEdges + vArray[dst].inEdges.insertEdge(src, e.weight, garbage); + } + else{ //deletion from inEdges + vArray[dst].inEdges.deleteEdge(src, garbage); + } + } + } + } + else if(currBucketId < LB_NUMBER_OF_BUCKETS * 2){ + //out bucket + currBucketId = currBucketId - LB_NUMBER_OF_BUCKETS; + for(u32 thId = 0; thId < num_threads; thId++){ + const vector& bucket = thInfo[thId].outBuckets[currBucketId]; + for(const Edge& e : bucket){ + const i64 src = e.source; + const i64 dst = e.destination; + if(!affected[src]){ + affected[src] = true; + } + if(!e.isDelete){ //insertion to outEdges + vArray[src].outEdges.insertEdge(dst, e.weight, thInfo[actualTh].edgeCnt); + } + else{ //deletion from outEdges + vArray[src].outEdges.deleteEdge(dst, thInfo[actualTh].edgeCnt); + } + } + } + } + else{ + break; + } + } + } -// std::cout << "Inserts--------------------" << std::endl; -// std::cout << " Total: " << insTot << std::endl; -// std::cout << " Succ : " << insSucc << std::endl; -// std::cout << " Fail : " << insTot - insSucc << std::endl; -// std::cout << std::endl; // -// std::cout << "Deletes--------------------" << std::endl; -// std::cout << " Total: " << delTot << std::endl; -// std::cout << " Succ : " << delSucc << std::endl; -// std::cout << " Fail : " << delTot - delSucc << std::endl; -// std::cout << std::endl; // -// std::cout << "Final number of edges: " << insSucc - delSucc << std::endl; -// ofstream out("probing_dist.csv"); -// for(auto it : probingDist){ -// out << it.first << "," << it.second << endl; +// +// +// //int thMask = (1 << getNextPow2Log2(num_threads)) - 1; +// +// #pragma omp parallel +// { +// const i64 actualTh = omp_get_thread_num(); +// LIKWID_MARKER_START("upd"); +// for(u64 i = 0; i < batchSize; i++){ +// const i64 src = el[i].source; +// const i64 dst = el[i].destination; +// +// //i64 targetTh = (src / 64) & thMask; +// i64 targetTh = (src / 64) % num_threads; +// if(targetTh == actualTh){ +// if(!el[i].sourceExists){ +// thInfo[actualTh].nodeCnt++; +// } +// if(!el[i].destExists){ +// thInfo[actualTh].nodeCnt++; +// } +// +// if(!affected[src]){ +// affected[src] = true; +// } +// +// #ifdef CALC_TYPE_SWITCH +// VType initType = VType::VTYPE_3; +// if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ +// initType = VType::VTYPE_1; +// } +// else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ +// initType = VType::VTYPE_2; +// } +// #endif +// +// if(!el[i].isDelete){ +// //insert out edge +// vArray[src].outEdges.insertEdge(dst, el[i].weight, thInfo[actualTh].edgeCnt); +// } +// else{ +// //delete out edge +// vArray[src].outEdges.deleteEdge(dst, thInfo[actualTh].edgeCnt); +// } +// +// #ifdef CALC_TYPE_SWITCH +// VType finType = VType::VTYPE_3; +// if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ +// finType = VType::VTYPE_1; +// } +// else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ +// finType = VType::VTYPE_2; +// } +// if(initType != finType){ +// thInfo[actualTh].switchCnt++; +// } +// #endif +// +// #ifdef CALC_DYNNAMIC_TYPE_MAPPING +// if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ +// thInfo[actualTh].type1++; +// } +// else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ +// thInfo[actualTh].type2++; +// } +// else{ +// thInfo[actualTh].type3++; +// } +// #endif +// } +// +// //targetTh = (dst / 64) & thMask; +// targetTh = (dst / 64) % num_threads; +// if(targetTh == actualTh){ +// if(!affected[dst]){ +// affected[dst] = true; +// } +// +// #ifdef CALC_TYPE_SWITCH +// VType initType = VType::VTYPE_3; +// if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ +// initType = VType::VTYPE_1; +// } +// else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ +// initType = VType::VTYPE_2; +// } +// #endif +// +// u64 garbage; +// if(!el[i].isDelete){ +// //insert in edge +// vArray[dst].inEdges.insertEdge(src, el[i].weight, garbage); +// } +// else{ +// //delete in edge +// vArray[dst].inEdges.deleteEdge(src, garbage); +// } +// +// #ifdef CALC_TYPE_SWITCH +// VType finType = VType::VTYPE_3; +// if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ +// finType = VType::VTYPE_1; +// } +// else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ +// finType = VType::VTYPE_2; +// } +// if(initType != finType){ +// thInfo[actualTh].switchCnt++; +// } +// #endif +// +// #ifdef CALC_DYNNAMIC_TYPE_MAPPING +// if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ +// thInfo[actualTh].type1++; +// } +// else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ +// thInfo[actualTh].type2++; +// } +// else{ +// thInfo[actualTh].type3++; +// } +// #endif +// } +// +// } +// LIKWID_MARKER_STOP("upd"); // } +#else + for(u64 i = 0; i < batchSize; i++){ + const u64 src = el[i].source; + const u64 dst = el[i].destination; + + if(!el[i].sourceExists){ + thInfo[0].nodeCnt++; + } + if(!el[i].destExists){ + thInfo[0].nodeCnt++; + } + //we do not need atomic operation on affected as long as "some" thread updates it + if(!affected[src]){ + affected[src] = true; + } + if(!affected[dst]){ + affected[dst] = true; + } + + u64 garbage; + if(!el[i].isDelete){ + //insertion + //insert out edge + vArray[src].outEdges.insertEdge(dst, el[i].weight, thInfo[0].edgeCnt); + + //insert in edge + vArray[dst].inEdges.insertEdge(src, el[i].weight, garbage); + } + else{ + //delete out edge + vArray[src].outEdges.deleteEdge(dst, thInfo[0].edgeCnt); + + //delete in edge + vArray[dst].inEdges.deleteEdge(src, garbage); + } + } +#endif + + for(u64 i = 0; i < num_threads; i++){ + num_edges += thInfo[i].edgeCnt; + thInfo[i].edgeCnt = 0; + num_nodes += thInfo[i].nodeCnt; + thInfo[i].nodeCnt = 0; + } + //num_nodes = el[batchSize - 1].lastAssignedId + 1; } +#endif + + #ifdef USE_HYBRID_HASHMAP VertexArray vArray; @@ -791,12 +1173,15 @@ class GraphTango : public dataStruc { //VertexArray vArray; const int num_threads; -#ifdef CALC_TYPE_SWITCH +#if defined(CALC_TYPE_SWITCH) || defined(CALC_DYNNAMIC_TYPE_MAPPING) typedef struct{ u64 edgeCnt = 0; u64 nodeCnt = 0; u64 switchCnt = 0; - u8 pad[40]; + u64 type1 = 0; + u64 type2 = 0; + u64 type3 = 0; + u8 pad[16]; } ThreadInfo; #else typedef struct{ @@ -840,7 +1225,46 @@ class GraphTango : public dataStruc { affected.fill(false); } - ~GraphTango(){ + virtual ~GraphTango(){ +#ifdef CALC_STATIC_TYPE_MAPPING + u64 type1 = 0; + u64 type2 = 0; + u64 type3 = 0; + for(u64 i = 0; i < num_nodes; i++){ + u64 numNeigh = in_degree(i); + if(numNeigh <= EdgeArray::TH0){ + type1++; + } + else if(numNeigh <= EdgeArray::TH1){ + type2++; + } + else{ + type3++; + } + numNeigh = out_degree(i); + if(numNeigh <= EdgeArray::TH0){ + type1++; + } + else if(numNeigh <= EdgeArray::TH1){ + type2++; + } + else{ + type3++; + } + } + cout << "Static type mapping: \n\tType1: " << type1 << "\t\tType2: " << type2 << "\t\tType3: " << type3 << endl; +#endif +#ifdef CALC_DYNNAMIC_TYPE_MAPPING + u64 dynType1 = 0; + u64 dynType2 = 0; + u64 dynType3 = 0; + for(u64 i = 0; i < num_threads; i++){ + dynType1 += thInfo[i].type1; + dynType2 += thInfo[i].type2; + dynType3 += thInfo[i].type3; + } + cout << "Dynamic type mapping: \n\tType1: " << dynType1 << "\t\tType2: " << dynType2 << "\t\tType3: " << dynType3 << endl; +#endif #ifdef CALC_TYPE_SWITCH u32 switchCnt = 0; for(u64 i = 0; i < num_threads; i++){ @@ -889,10 +1313,10 @@ class GraphTango : public dataStruc { #ifdef CALC_TYPE_SWITCH VType initType = VType::VTYPE_3; - if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH0){ + if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ initType = VType::VTYPE_1; } - else if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH1){ + else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ initType = VType::VTYPE_2; } #endif @@ -908,16 +1332,28 @@ class GraphTango : public dataStruc { #ifdef CALC_TYPE_SWITCH VType finType = VType::VTYPE_3; - if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH0){ + if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ finType = VType::VTYPE_1; } - else if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH1){ + else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ finType = VType::VTYPE_2; } if(initType != finType){ thInfo[actualTh].switchCnt++; } #endif + + #ifdef CALC_DYNNAMIC_TYPE_MAPPING + if(vArray[src].outEdges.capacity <= EdgeArray::TH0){ + thInfo[actualTh].type1++; + } + else if(vArray[src].outEdges.capacity <= EdgeArray::TH1){ + thInfo[actualTh].type2++; + } + else{ + thInfo[actualTh].type3++; + } + #endif } //targetTh = (dst / 64) & thMask; @@ -929,10 +1365,10 @@ class GraphTango : public dataStruc { #ifdef CALC_TYPE_SWITCH VType initType = VType::VTYPE_3; - if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH0){ + if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ initType = VType::VTYPE_1; } - else if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH1){ + else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ initType = VType::VTYPE_2; } #endif @@ -949,16 +1385,28 @@ class GraphTango : public dataStruc { #ifdef CALC_TYPE_SWITCH VType finType = VType::VTYPE_3; - if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH0){ + if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ finType = VType::VTYPE_1; } - else if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH1){ + else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ finType = VType::VTYPE_2; } if(initType != finType){ thInfo[actualTh].switchCnt++; } #endif + + #ifdef CALC_DYNNAMIC_TYPE_MAPPING + if(vArray[dst].inEdges.capacity <= EdgeArray::TH0){ + thInfo[actualTh].type1++; + } + else if(vArray[dst].inEdges.capacity <= EdgeArray::TH1){ + thInfo[actualTh].type2++; + } + else{ + thInfo[actualTh].type3++; + } + #endif } } @@ -1007,10 +1455,6 @@ class GraphTango : public dataStruc { thInfo[i].edgeCnt = 0; num_nodes += thInfo[i].nodeCnt; thInfo[i].nodeCnt = 0; -#ifdef CALC_TYPE_SWITCH - switchCnt += thInfo[i].switchCnt; - thInfo[i].switchCnt = 0; -#endif } //num_nodes = el[batchSize - 1].lastAssignedId + 1; } @@ -1018,7 +1462,6 @@ class GraphTango : public dataStruc { #endif - #ifdef USE_GT_BALANCED_TYPE3_ONLY Vertex* vArray; diff --git a/src/dynamic/GraphTangoHash.h b/src/dynamic/GraphTangoHash.h deleted file mode 100644 index e43b07f..0000000 --- a/src/dynamic/GraphTangoHash.h +++ /dev/null @@ -1,392 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "CustomAllocator.h" - -//typedef uint64_t u64; -//typedef uint32_t u32; -//typedef uint8_t u8; - -#define USE_64_BIT_KEY - -#ifdef USE_64_BIT_KEY -typedef u64 Key; -#else -typedef u32 Key; -#endif - -#define CACHE_LINE_SIZE 64U -#define ELEMS_IN_LINE (CACHE_LINE_SIZE / sizeof(Key)) - -#ifdef USE_64_BIT_KEY -#define FLAG_EMPTY_SLOT 0xFFFFFFFFFFFFFFFFULL -#define FLAG_TOMB_STONE 0xFFFFFFFFFFFFFFFEULL -#define CONST_FACTOR_A 11400714818402812347ULL -#define NUM_INITIAL_ELEMS (5 + 8) -#else -#define FLAG_EMPTY_SLOT 0xFFFFFFFFU -#define FLAG_TOMB_STONE 0xFFFFFFFEU -#define CONST_FACTOR_A 2654435769U -#endif - - - -using namespace std; - -#ifdef USE_CAHCE_FRIENDLY_HASH - -template -class GraphTangoHash{ -public: - -#ifdef USE_64_BIT_KEY - static constexpr u8 rotation[8] = {0, 5, 2, 6, 1, 4, 7, 3}; - constexpr u32 getShiftAmt(){ - return 64 - __builtin_ctzl(capacity / ELEMS_IN_LINE); - } -#else - //static constexpr u8 rotation[16] = {0, 1, 4, 9, 15, 2, 10, 6, 3, 8, 14, 5, 12, 7, 11, 13}; - static constexpr u8 rotation[16] = {0, 9, 6, 15, 2, 11, 4, 13, 3, 7, 10, 14, 5, 1, 8, 12}; - constexpr u32 getShiftAmt(){ - return 32 - __builtin_ctzl(capacity / ELEMS_IN_LINE); - } -#endif - - u32 degree = 0; - u32 capacity = 0; - Neigh* __restrict neighArr = nullptr; - Neigh neigh[NUM_INITIAL_ELEMS]; - - //map probes; - - GraphTangoHash(){ - //capacity = NUM_INITIAL_ELEMS; - //neighArr = (Neigh*)globalAllocator.allocate(capacity * sizeof(Neigh)); - //memset(neighArr, 0xff, capacity * sizeof(Neigh)); - } - - ~GraphTangoHash(){ - //free(neighArr); - } - -// inline u32 hash1(u32 key){ -// return (u32)(key * CONST_FACTOR_A) >> (32 - capacity / ELEMS_IN_LINE + 1); -// } -// -// inline u32 hash2(u32 key){ -// const u32 mask = capacity / ELEMS_IN_LINE - 1; -// return (key & mask) | 1; //return an odd value -// } -// -// u32 find(u32 key){ -// const u32 numCacheLines = capacity / ELEMS_IN_LINE; -// const u32 h1 = hash1(key); -// for(u32 i = 0; i < numCacheLines; i++){ -// const u32 offset = ((h1 + i * hash2(key)) & (numCacheLines - 1)) * ELEMS_IN_LINE; -// //cout << "\t" << offset << endl; -// -// //check all of the brought cache line -// //#pragma GCC unroll 8 -// for(int j = 0; j < ELEMS_IN_LINE; j++){ -// if(neighArr[offset + j] == key){ -// return offset + j; //found -// } -// if(neighArr[offset + j] == FLAG_EMPTY_SLOT){ -// return FLAG_EMPTY_SLOT; //not found -// } -// } -// } -// assert(false); //should never reach here -// return FLAG_EMPTY_SLOT; -// } - - - inline void insertDuringRehash(u32 key){ - const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1; - const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt(); //[0,1,...,#cache_lines] - for(u32 i = 0; i <= cacheLineMask; i++){ - const Key h2 = (key & cacheLineMask) | 1; - const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE; //cyclic within [0,1,...,#cache_lines] - - //check all elements of the cache line - //#pragma GCC unroll 8 - for(int j = 0; j < ELEMS_IN_LINE; j++){ -#ifdef USE_64_BIT_KEY - const Key idx = base | ((key + rotation[j]) & 0x7); -#else - const Key idx = base | ((key + rotation[j]) & 0xf); -#endif - if(neighArr[idx].node == FLAG_EMPTY_SLOT){ - neighArr[idx].node = key; //successful insertion - return; - } - } - } - } - - void rehash(){ - Neigh* __restrict oldArr = neighArr; - const u32 oldCap = capacity; - - capacity = capacity * 2; - neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); - memset(neighArr, 0xff, capacity * sizeof(Key)); //reset new array - for(u32 i = 0; i < oldCap; i++){ - const Key key = oldArr[i].node; - if(key < FLAG_TOMB_STONE){ - insertDuringRehash(key); - } - } - - globalAllocator.freePow2(oldArr, oldCap); - } - - void insert(Key key, u64& edgeCnt){ - if(degree < HYBRID_HASH_PARTITION){ - //linear search - for(u32 i = 0; i < degree; i++){ - if(neighArr[i].node == key){ - //found duplicate, nothing to do - return; - } - } - neighArr[degree].node = key; - degree++; - edgeCnt++; - - if(__builtin_expect(degree == capacity, 0)){ - // Two things can happen now. - // 1. If we reached the partition threshold, switch to hash table - // 2. Otherwise, just grow neighArr - Neigh* __restrict oldArr = neighArr; - const u32 oldCap = capacity; - if(__builtin_expect(degree == HYBRID_HASH_PARTITION, 0)){ - //switch to hash table - capacity = capacity * 4; - neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); - memset(neighArr, 0xff, capacity * sizeof(Neigh)); //reset new array - for(u32 i = 0; i < degree; i++){ - insertDuringRehash(oldArr[i].node); - } - } - else{ - //grow neighArr - capacity = getNextPow2MinRet(capacity * 2); - neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); - memcpy(neighArr, oldArr, degree * sizeof(Neigh)); - } - if(oldArr != neigh){ - globalAllocator.freePow2(oldArr, oldCap); - } - } - } - else{ - //hash based search - if(__builtin_expect(degree > (capacity / 2), 0)){ - //Load factor is 0.5. Grow table - rehash(); - } - - //u32 probelen = 0; - const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1; - const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt(); //[0,1,...,#cache_lines] - for(Key i = 0; i <= cacheLineMask; i++){ - const Key h2 = (key & cacheLineMask) | 1; - const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE; //cyclic within [0,1,...,#cache_lines] - - //check all elements of the cache line - //#pragma GCC unroll 8 - for(int j = 0; j < ELEMS_IN_LINE; j++){ - #ifdef USE_64_BIT_KEY - const Key idx = base | ((key + rotation[j]) & 0x7); - #else - const Key idx = base | ((key + rotation[j]) & 0xf); - #endif - //probelen++; - if(neighArr[idx].node >= FLAG_TOMB_STONE){ - neighArr[idx].node = key; //successful insertion - degree++; - edgeCnt++; - //probes[probelen]++; - return; - } - if(neighArr[idx].node == key){ - return; //found, no need to do anything - } - } - } - } - } - - void erase(u32 key){ - - } - -}; - -template -constexpr u8 GraphTangoHash::rotation[8]; - -#endif - - -#ifdef USE_CAHCE_FRIENDLY_HASH_ONLY - -template -class GraphTangoHash{ -public: - -#ifdef USE_64_BIT_KEY - static constexpr u8 rotation[8] = {0, 5, 2, 6, 1, 4, 7, 3}; - constexpr u32 getShiftAmt(){ - return 64 - __builtin_ctzl(capacity / ELEMS_IN_LINE); - } -#else - //static constexpr u8 rotation[16] = {0, 1, 4, 9, 15, 2, 10, 6, 3, 8, 14, 5, 12, 7, 11, 13}; - static constexpr u8 rotation[16] = {0, 9, 6, 15, 2, 11, 4, 13, 3, 7, 10, 14, 5, 1, 8, 12}; - constexpr u32 getShiftAmt(){ - return 32 - __builtin_ctzl(capacity / ELEMS_IN_LINE); - } -#endif - - u32 degree = 0; - u32 capacity = 0; - Neigh* __restrict neighArr = nullptr; - Neigh* __restrict adjList = nullptr; - Neigh neigh[NUM_INITIAL_ELEMS]; - - //map probes; - - GraphTangoHash(){ - //capacity = NUM_INITIAL_ELEMS; - //neighArr = (Neigh*)globalAllocator.allocate(capacity * sizeof(Neigh)); - //memset(neighArr, 0xff, capacity * sizeof(Neigh)); - } - - ~GraphTangoHash(){ - //free(neighArr); - } - - inline void insertDuringRehash(u32 key){ - const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1; - const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt(); //[0,1,...,#cache_lines] - for(u32 i = 0; i <= cacheLineMask; i++){ - const Key h2 = (key & cacheLineMask) | 1; - const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE; //cyclic within [0,1,...,#cache_lines] - - //check all elements of the cache line - //#pragma GCC unroll 8 - for(int j = 0; j < ELEMS_IN_LINE; j++){ -#ifdef USE_64_BIT_KEY - const Key idx = base | ((key + rotation[j]) & 0x7); -#else - const Key idx = base | ((key + rotation[j]) & 0xf); -#endif - if(neighArr[idx].node == FLAG_EMPTY_SLOT){ - neighArr[idx].node = key; //successful insertion - return; - } - } - } - } - - void rehash(){ - Neigh* __restrict oldArr = neighArr; - const u32 oldCap = capacity; - - capacity = capacity * 2; - neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); - memset(neighArr, 0xff, capacity * sizeof(Key)); //reset new array - for(u32 i = 0; i < oldCap; i++){ - const Key key = oldArr[i].node; - if(key < FLAG_TOMB_STONE){ - insertDuringRehash(key); - } - } - - globalAllocator.freePow2(oldArr, oldCap); - } - - void insert(Key key, u64& edgeCnt){ - if(degree == NUM_INITIAL_ELEMS){ - //switch to hash table, nut key is not yet inserted - capacity = getNextPow2MinRet(NUM_INITIAL_ELEMS * 4); - neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); - memset(neighArr, 0xff, capacity * sizeof(Neigh)); //reset new array - for(u32 i = 0; i < NUM_INITIAL_ELEMS; i++){ - insertDuringRehash(neigh[i].node); - } - } - - if(degree < NUM_INITIAL_ELEMS){ - //linear search - for(u32 i = 0; i < degree; i++){ - if(neigh[i].node == key){ - //found duplicate, nothing to do - return; - } - } - neigh[degree].node = key; - degree++; - edgeCnt++; - } - else{ - //hash based search - if(__builtin_expect(degree > (capacity / 2), 0)){ - //Load factor is 0.5. Grow table - rehash(); - } - - //u32 probelen = 0; - const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1; - const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt(); //[0,1,...,#cache_lines] - for(Key i = 0; i <= cacheLineMask; i++){ - const Key h2 = (key & cacheLineMask) | 1; - const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE; //cyclic within [0,1,...,#cache_lines] - - //check all elements of the cache line - //#pragma GCC unroll 8 - for(int j = 0; j < ELEMS_IN_LINE; j++){ - #ifdef USE_64_BIT_KEY - const Key idx = base | ((key + rotation[j]) & 0x7); - #else - const Key idx = base | ((key + rotation[j]) & 0xf); - #endif - //probelen++; - if(neighArr[idx].node >= FLAG_TOMB_STONE){ - neighArr[idx].node = key; //successful insertion - degree++; - edgeCnt++; - //probes[probelen]++; - adjList = nullptr; //adjacency list no longer valid - return; - } - if(neighArr[idx].node == key){ - return; //found, no need to do anything - } - } - } - } - } - - void erase(Key key, u64& edgeCnt){ - - } - -}; - -template -constexpr u8 GraphTangoHash::rotation[8]; - -#endif - - - - diff --git a/src/dynamic/LockFreePoolWithList.h b/src/dynamic/LockFreePoolWithList.h index 6798e8d..c36abb4 100644 --- a/src/dynamic/LockFreePoolWithList.h +++ b/src/dynamic/LockFreePoolWithList.h @@ -11,7 +11,7 @@ #define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT) -template +template class LockFreePoolWithList { alignas(64) void* __restrict nextFreePtrs[MAX_THREADS][MAX_SEGMENT_BITS]; @@ -63,6 +63,7 @@ class LockFreePoolWithList { } void* allocPow2(u64 size){ + assert(isPowOf2(size)); return allocLog2(getPow2Log2(size)); } @@ -81,6 +82,7 @@ class LockFreePoolWithList { } void freePow2(void* __restrict ptr, u64 size){ + assert(isPowOf2(size)); freeLog2(ptr, getPow2Log2(size)); } diff --git a/src/dynamic/Vertex.h b/src/dynamic/Vertex.h index 0dd543b..9da03ef 100644 --- a/src/dynamic/Vertex.h +++ b/src/dynamic/Vertex.h @@ -23,10 +23,11 @@ typedef robin_hood::unordered_flat_map graphite_hashmap; typedef tsl::robin_map graphite_hashmap; #else -typedef std::unordered_map::hasher, std::unordered_map::key_equal, custom_allocator< std::pair> > graphite_hashmap; +typedef std::unordered_map::hasher, + std::unordered_map::key_equal, + custom_allocator> > graphite_hashmap; #endif - #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING template @@ -49,7 +50,6 @@ class Vertex{ #endif - #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING_TIGHTER #define INITIAL_EDGES 5 @@ -75,60 +75,71 @@ class Vertex{ #endif - -#if defined(USE_GT_BALANCED) || defined(USE_GT_BALANCED_DYN_PARTITION) +#if defined(USE_GT_BALANCED) || defined(USE_GT_BALANCED_DYN_PARTITION) || defined(USE_GT_LOAD_BALANCED) #define FLAG_EMPTY_SLOT 0xFFFFFFFFU #define FLAG_TOMB_STONE 0xFFFFFFFEU #define CACHE_LINE_SIZE 64 -template -class alignas(CACHE_LINE_SIZE) EdgeArray{ +template +class alignas(CACHE_LINE_SIZE) EdgeArray { private: - void rebuildHashTable(u64 oldCap, u64 newCap){ - if(oldCap > TH1){ - //free old map - globalAllocator.freePow2(etype.type2_3.mapArr, oldCap * 2 * sizeof(DstLocPair)); - etype.type2_3.mapArr = nullptr; - } - - if(newCap > TH1){ + void rebuildHashTable(u64 oldCap, u64 newCap) { + //if newCap == power of 2 + if (!(newCap & (newCap - 1)) + && (etype.type3.mapCapacity != (newCap * 2))) { + if (etype.type3.mapCapacity) { + globalAllocator.freePow2(etype.type3.mapArr, + etype.type3.mapCapacity * sizeof(DstLocPair)); + } //allocate new map - etype.type2_3.mapArr = (DstLocPair*)globalAllocator.allocate(newCap * 2 * sizeof(DstLocPair)); + etype.type3.mapCapacity = newCap * 2; + etype.type3.mapArr = (DstLocPair*) globalAllocator.allocate( + etype.type3.mapCapacity * sizeof(DstLocPair)); + DstLocPair *__restrict locMap = etype.type3.mapArr; + memset(locMap, -1, etype.type3.mapCapacity * sizeof(DstLocPair)); - DstLocPair* __restrict locMap = etype.type2_3.mapArr; - memset(locMap, -1, newCap * 2 * sizeof(DstLocPair)); + const u32 mask = etype.type3.mapCapacity - 1; - const u32 mask = newCap * 2 - 1; + assert(degree < etype.type3.mapCapacity); //add existing nodes to hash - const Neigh* __restrict nn = etype.type2_3.neighArr; - for(u64 i = 0; i < degree; i++){ - const u32 dst = nn[i].node; - u32 idx = dst & mask; - while(true){ - if(locMap[idx].dst == FLAG_EMPTY_SLOT){ - //found insertion point - locMap[idx].dst = dst; - locMap[idx].loc = i; - break; - } - //move on - idx++; - if(idx == (newCap * 2)){ - idx = 0; + u64 loc = 0; + u32 blockNum = degree / BLOCK_SIZE; + assert(degree % BLOCK_SIZE == 0); + for (u32 j = 0; j < blockNum; j++) { + const Neigh *nn = etype.type3.blockList[j]; + for (u64 i = 0; i < BLOCK_SIZE; i++) { + const u32 dst = nn[i].node; + assert((i32 )dst >= 0); + u32 idx = dst & mask; + while (true) { + if (locMap[idx].dst == FLAG_EMPTY_SLOT) { + //found insertion point + locMap[idx].dst = dst; + locMap[idx].loc = loc; + break; + } + //move on + idx++; + if (idx == (etype.type3.mapCapacity)) { + idx = 0; + } } + loc++; } } } - } + } public: - const static u64 TH0 = ((CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32)) / sizeof(Neigh)); + const static u64 TH0 = ((CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32)) + / sizeof(Neigh)); const static u64 TH1 = HYBRID_HASH_PARTITION; + const static u64 BLOCK_SIZE = TH1; // when type3 fills out the existing block(s), we allocate a new block of size BLOCK_SIZE. u32 degree = 0; u32 capacity = TH0; @@ -139,209 +150,261 @@ class alignas(CACHE_LINE_SIZE) EdgeArray{ } type1; struct { - Neigh* __restrict neighArr = nullptr; - DstLocPair* __restrict mapArr = nullptr; - } type2_3; + Neigh *neighArr = nullptr; + } type2; + + struct { + DstLocPair *__restrict mapArr = nullptr; + vector blockList; + u64 mapCapacity; + } type3; + } etype; u8 __pad[CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32) - sizeof(etype)]; - void insertEdge(const Idx dstId, const Weight weight, u64& edgeCnt){ + void insertEdge(const Idx dstId, const Weight weight, u64 &edgeCnt) { + assert(dstId >= 0); //First, check if needs expanding - if(__builtin_expect(degree == capacity, false)){ - capacity = getNextPow2(capacity * 2); - Neigh* __restrict newPtr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh)); + if (__builtin_expect(degree == capacity, false)) { - if(degree <= TH0){ //Going from Type 1 to Type 2 + if (degree == TH0) { //Type 1 => Type 2 + capacity = getNextPow2(capacity * 2); + Neigh *__restrict newPtr = (Neigh*) globalAllocator.allocPow2( + capacity * sizeof(Neigh)); memcpy(newPtr, etype.type1.neigh, degree * sizeof(Neigh)); - etype.type2_3.mapArr = nullptr; - } - else{ //Type 2 or 3 - memcpy(newPtr, etype.type2_3.neighArr, degree * sizeof(Neigh)); - globalAllocator.freePow2(etype.type2_3.neighArr, capacity / 2 * sizeof(Neigh)); - } - etype.type2_3.neighArr = newPtr; - - //Grow hash table if needed - rebuildHashTable(capacity / 2, capacity); - } - - Neigh* __restrict currNeighArr; - - if(capacity <= TH0){ + etype.type2.neighArr = newPtr; + } else if ((degree * 2) <= TH1) { // Type 2 => Type 2 + capacity = capacity * 2; + Neigh *__restrict newPtr = (Neigh*) globalAllocator.allocPow2( + capacity * sizeof(Neigh)); + memcpy(newPtr, etype.type2.neighArr, degree * sizeof(Neigh)); + globalAllocator.freePow2(etype.type2.neighArr, + degree * sizeof(Neigh)); + etype.type2.neighArr = newPtr; + } else if (degree == TH1) { // Type 2 => Type 3 + capacity = capacity + BLOCK_SIZE; + new (&etype.type3.blockList) vector(); + etype.type3.blockList.push_back(etype.type2.neighArr); + assert(etype.type2.neighArr[0].node >= 0); + Neigh *newPtr = (Neigh*) globalAllocator.allocPow2( + BLOCK_SIZE * sizeof(Neigh)); + etype.type3.blockList.push_back(newPtr); + etype.type3.mapCapacity = 0; + //Grow hash table if needed + rebuildHashTable(degree, capacity); + } else { // Type 3 => Type 3 + capacity = capacity + BLOCK_SIZE; + Neigh *newPtr = (Neigh*) globalAllocator.allocPow2( + BLOCK_SIZE * sizeof(Neigh)); + etype.type3.blockList.push_back(newPtr); + //Grow hash table if needed + rebuildHashTable(degree, capacity); + } + } + + Neigh *__restrict currNeighArr; + + if (capacity <= TH0) { currNeighArr = etype.type1.neigh; - } - else{ - currNeighArr = etype.type2_3.neighArr; + } else if (capacity <= TH1) { + currNeighArr = etype.type2.neighArr; } //search and insert if not found - if(capacity <= TH1){ + if (capacity <= TH1) { //Type 1 or 2, do linear search - for(u64 i = 0; i < degree; i++){ - if(currNeighArr[i].node == dstId){ + for (u64 i = 0; i < degree; i++) { + if (currNeighArr[i].node == dstId) { //found same edge, just update currNeighArr[i].setWeight(weight); + assert(false); return; } } - } - else{ + //not found, insert + currNeighArr[degree].node = dstId; + currNeighArr[degree].setWeight(weight); + } else { //type 3, use hash table + adj list - u32 idx = dstId & (capacity * 2 - 1); - DstLocPair* __restrict locMap = etype.type2_3.mapArr; - DstLocPair* __restrict insLoc = nullptr; + u32 idx = dstId & (etype.type3.mapCapacity - 1); + DstLocPair *__restrict locMap = etype.type3.mapArr; + DstLocPair *__restrict insLoc = nullptr; //probe = 0; - while(true){ + while (true) { //probe++; - if(locMap[idx].dst == FLAG_EMPTY_SLOT){ + if (locMap[idx].dst == FLAG_EMPTY_SLOT) { //edge not found, insert - if(insLoc){ - locMap = insLoc; //points to the first tomb stone found + if (insLoc) { + locMap = insLoc; //points to the first tomb stone found } locMap[idx].dst = dstId; locMap[idx].loc = degree; break; - } - else if((locMap[idx].dst == FLAG_TOMB_STONE) && (insLoc == nullptr)){ + } else if ((locMap[idx].dst == FLAG_TOMB_STONE) + && (insLoc == nullptr)) { insLoc = locMap + idx; - } - else if(locMap[idx].dst == dstId){ + } else if (locMap[idx].dst == dstId) { //edge found, update weight - currNeighArr[locMap[idx].loc].setWeight(weight); + u32 blockId = locMap[idx].loc / BLOCK_SIZE; + u32 blockOffset = locMap[idx].loc % BLOCK_SIZE; + assert( + etype.type3.blockList[blockId][blockOffset].node + == dstId); + etype.type3.blockList[blockId][blockOffset].setWeight( + weight); //probingDist[probe]++; + assert(false); return; } //move on idx++; - if(idx == (capacity * 2)){ + if (idx == (etype.type3.mapCapacity)) { idx = 0; } } + //not found, insert + u32 blockId = degree / BLOCK_SIZE; + u32 blockOffset = degree % BLOCK_SIZE; + etype.type3.blockList[blockId][blockOffset].node = dstId; + etype.type3.blockList[blockId][blockOffset].setWeight(weight); + assert(etype.type3.blockList[blockId][blockOffset].node >= 0); + assert( + etype.type3.blockList[etype.type3.blockList.size() - 1][0].node + >= 0); } - //not found, insert - currNeighArr[degree].node = dstId; - currNeighArr[degree].setWeight(weight); degree++; edgeCnt++; } - - void deleteEdge(const Idx dstId, u64& edgeCnt){ - Neigh* __restrict currNeighArr; - Neigh* __restrict nn = nullptr; - - if(capacity <= TH0){ - currNeighArr = etype.type1.neigh; - } - else{ - currNeighArr = etype.type2_3.neighArr; - } - + void deleteEdge(const Idx dstId, u64 &edgeCnt) { + assert(dstId >= 0); //search - if(capacity <= TH1){ - //Type 1 or 2, do linear search - for(u64 i = 0; i < degree; i++){ - if(currNeighArr[i].node == dstId){ - nn = currNeighArr + i; - break; + if (capacity <= TH0) { //Type 1 + Neigh *__restrict currNeighArr = etype.type1.neigh; + for (u64 i = 0; i < degree; i++) { + if (currNeighArr[i].node == dstId) { + //edge found, delete + degree--; + edgeCnt--; + currNeighArr[i] = currNeighArr[degree]; + return; } } - if(__builtin_expect(nn != nullptr, true)){ - //edge found, delete - degree--; - edgeCnt--; - nn->node = currNeighArr[degree].node; - nn->setWeight(currNeighArr[degree].getWeight()); - } - else{ - //edge not found, nothing to do - return; - } } - else{ + if (capacity <= TH1) { //Type2 + Neigh *__restrict currNeighArr = etype.type2.neighArr; + for (u64 i = 0; i < degree; i++) { + if (currNeighArr[i].node == dstId) { + //edge found, delete + degree--; + edgeCnt--; + currNeighArr[i] = currNeighArr[degree]; + + if (degree * 4 <= capacity) { + //reduce capacity + u64 newCap = capacity / 2; + if (newCap <= TH0) { + //T2 => T1 + memcpy(etype.type1.neigh, currNeighArr, + degree * sizeof(Neigh)); + globalAllocator.freePow2(currNeighArr, + capacity * sizeof(Neigh)); + capacity = TH0; + } else { + //T2 => T2 + etype.type2.neighArr = + (Neigh*) globalAllocator.allocPow2( + newCap * sizeof(Neigh)); + memcpy(etype.type2.neighArr, currNeighArr, + degree * sizeof(Neigh)); + globalAllocator.freePow2(currNeighArr, + capacity * sizeof(Neigh)); + capacity = newCap; + } + } + return; + } + } + } else { //Type 3 //using hashed mode - u32 idx = dstId & (capacity * 2 - 1); - DstLocPair* __restrict locMap = etype.type2_3.mapArr; - while(true){ - if(locMap[idx].dst == dstId){ + u32 idx = dstId & (etype.type3.mapCapacity - 1); + DstLocPair *__restrict locMap = etype.type3.mapArr; + while (true) { + if (locMap[idx].dst == dstId) { //edge found, delete degree--; edgeCnt--; //delSucc++; - locMap[idx].dst = FLAG_TOMB_STONE; //invalidate previous hash-table entry + locMap[idx].dst = FLAG_TOMB_STONE; //invalidate previous hash-table entry const u32 loc = locMap[idx].loc; - if(__builtin_expect(loc != degree, true)){ //nothing to do if last entry is removed - const u32 node = currNeighArr[degree].node; + if (__builtin_expect(loc != degree, true)) { //nothing to do if last entry is removed + u32 currBlockId = loc / BLOCK_SIZE; + u32 currBlockOffset = loc % BLOCK_SIZE; + u32 lastBlockId = degree / BLOCK_SIZE; + u32 lastBlockOffset = degree % BLOCK_SIZE; + + const u32 node = + etype.type3.blockList[lastBlockId][lastBlockOffset].node; //copy last entry - currNeighArr[loc] = currNeighArr[degree]; + etype.type3.blockList[currBlockId][currBlockOffset] = + etype.type3.blockList[lastBlockId][lastBlockOffset]; //point to correct location of the swapped entry - u32 idxMoved = node & (capacity * 2 - 1); - while(locMap[idxMoved].dst != node){ + u32 idxMoved = node & (etype.type3.mapCapacity - 1); + while (locMap[idxMoved].dst != node) { idxMoved++; - if(idxMoved == (capacity * 2)){ + if (idxMoved == (etype.type3.mapCapacity)) { idxMoved = 0; } } locMap[idxMoved].loc = loc; } - break; - } - else if (locMap[idx].dst == FLAG_EMPTY_SLOT) { + + //free block if needed + if (degree % BLOCK_SIZE == 0) { + assert(degree == capacity - BLOCK_SIZE); + globalAllocator.freePow2(etype.type3.blockList.back(), + BLOCK_SIZE * sizeof(Neigh)); + etype.type3.blockList.pop_back(); + capacity = capacity - BLOCK_SIZE; + + //check if type switch or rehash is necessary + if (capacity <= TH1) { //T3 => T2 + globalAllocator.freePow2(etype.type3.mapArr, + etype.type3.mapCapacity * sizeof(Neigh)); + etype.type2.neighArr = etype.type3.blockList[0]; + } else { //T3 => T3 + //rehash if needed + rebuildHashTable(capacity + BLOCK_SIZE, capacity); + } + } + + return; + } else if (locMap[idx].dst == FLAG_EMPTY_SLOT) { //edge not found, return return; } //move on idx++; - if(idx == (capacity * 2)){ + if (idx == (etype.type3.mapCapacity)) { idx = 0; } } } - - if((capacity > TH0) && ((degree * 4) <= capacity)){ - //time to reduce capacity - const u64 oldCap = capacity; - const u64 newCap = capacity / 2; - capacity = newCap; - - Neigh* __restrict oldPtr = etype.type2_3.neighArr; - Neigh* __restrict newPtr; - - if(newCap <= TH0){ - //moving from type 2 or 3 to type 1 - newPtr = etype.type1.neigh; - capacity = TH0; - } - else{ - etype.type2_3.neighArr = (Neigh*)globalAllocator.allocPow2(newCap * sizeof(Neigh)); - newPtr = etype.type2_3.neighArr; - } - - //copy old adjList and free - memcpy(newPtr, oldPtr, degree * sizeof(Neigh)); - globalAllocator.freePow2(oldPtr, oldCap * sizeof(Neigh)); - - //shrink or delete hash table if needed - rebuildHashTable(oldCap, newCap); - } } }; - -template -class Vertex{ +template +class Vertex { public: - EdgeArray inEdges; - EdgeArray outEdges; + EdgeArray inEdges; + EdgeArray outEdges; }; - #endif - #if defined(USE_GT_BALANCED_TYPE3_ONLY) #define FLAG_EMPTY_SLOT 0xFFFFFFFFU @@ -530,7 +593,6 @@ class Vertex{ #endif - #ifdef USE_GT_BALANCED_STDMAP #define CACHE_LINE_SIZE 64 @@ -721,8 +783,6 @@ class Vertex{ #endif - - #if defined(USE_GT_BALANCED_MALLOC_STDMAP) || defined(USE_GT_BALANCED_RHH) #define CACHE_LINE_SIZE 64 @@ -913,8 +973,6 @@ class Vertex{ #endif - - #if defined(USE_GT_BALANCED_TSL_RHH) #define CACHE_LINE_SIZE 64 @@ -1106,9 +1164,6 @@ class Vertex{ #endif - - - #ifdef USE_GT_BALANCED_ABSEIL #define CACHE_LINE_SIZE 64 @@ -1299,8 +1354,6 @@ class Vertex{ #endif - - #ifdef USE_GT_BALANCED_MALLOC #define FLAG_EMPTY_SLOT 0xFFFFFFFFU @@ -1566,8 +1619,6 @@ class Vertex{ #endif - - #ifdef USE_GT_UPDATE #define FLAG_EMPTY_SLOT 0xFFFFFFFFU @@ -1832,9 +1883,7 @@ class alignas(CACHE_LINE_SIZE) EdgeArray{ degree--; edgeCnt--; nn->node = currNeighArr[degree].node; - nn->setWeight(currNeighArr[degree].getWeight()); - } - else{ + //edge not found, nothing to do return; } @@ -1921,7 +1970,6 @@ class Vertex{ #endif - #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING_AND_EDGE_ARR_LOCKING template @@ -1944,7 +1992,6 @@ class Vertex{ #endif - #ifdef USE_SORTED_EDGES template @@ -1966,7 +2013,6 @@ class Vertex{ #endif - #ifdef USE_CAHCE_FRIENDLY_HASH #include "GraphTangoHash.h" @@ -1980,7 +2026,6 @@ class Vertex{ #endif - #ifdef USE_CAHCE_FRIENDLY_HASH_ONLY #include "GraphTangoHash.h" @@ -1993,4 +2038,3 @@ class Vertex{ }; #endif - diff --git a/src/dynamic/abstract_data_struc.h b/src/dynamic/abstract_data_struc.h index 475005e..693f55c 100644 --- a/src/dynamic/abstract_data_struc.h +++ b/src/dynamic/abstract_data_struc.h @@ -16,10 +16,6 @@ class dataStruc { int64_t num_nodes = 0; int64_t num_edges = 0; -#ifdef CALC_TYPE_SWITCH - uint64_t switchCnt = 0; -#endif - bool weighted; bool directed; std::vector property; diff --git a/src/dynamic/common.h b/src/dynamic/common.h index 4b77c8c..436eabe 100644 --- a/src/dynamic/common.h +++ b/src/dynamic/common.h @@ -21,6 +21,9 @@ typedef int8_t i8; typedef I64 Idx; #define MIN_RET_VAL 2 +#ifndef _OPENMP +#define _OPENMP +#endif //#define LIKWID_PERFMON @@ -38,6 +41,8 @@ typedef I64 Idx; #define LIKWID_MARKER_GET(regionTag, nevents, events, time, count) #endif +//#define ENABLE_PROFILING + //#define CALC_TYPE_SWITCH //#define USE_CFH_FOR_DAH //#define CALC_MEM_PER_EDGE @@ -45,10 +50,19 @@ typedef I64 Idx; //#define CALC_EDGE_TOUCHED //#define USE_HUGEPAGE +#ifdef ENABLE_PROFILING +#define CALC_TYPE_SWITCH +#define CALC_MEM_PER_EDGE +//#define CALC_EDGE_TOUCHED /* do it later as it requires running the algo */ +#define CALC_STATIC_TYPE_MAPPING +#define CALC_DYNNAMIC_TYPE_MAPPING +#endif + //define only one of the following //#define USE_HYBRID_HASHMAP //#define USE_HYBRID_HASHMAP_WITH_CFH -#define USE_GT_BALANCED +#define USE_GT_LOAD_BALANCED +//#define USE_GT_BALANCED //#define USE_GT_BALANCED_TYPE3_ONLY //#define USE_GT_BALANCED_MALLOC //#define USE_GT_BALANCED_STDMAP @@ -82,14 +96,19 @@ typedef I64 Idx; || defined(USE_GT_BALANCED_DYN_PARTITION) \ || defined(USE_GT_BALANCED_ABSEIL) \ || defined(USE_GT_BALANCED_RHH) \ - || defined(USE_GT_BALANCED_TSL_RHH) -#define HYBRID_HASH_PARTITION 32UL + || defined(USE_GT_BALANCED_TSL_RHH) \ + || defined(USE_GT_LOAD_BALANCED) +#define HYBRID_HASH_PARTITION 64UL #endif #ifdef USE_SORTED_EDGES #define LINEAR_BUFF_SIZE 512UL #endif +#ifdef USE_GT_LOAD_BALANCED +#define LB_NUMBER_OF_BUCKETS 128UL +#endif + typedef struct { u32 dst; u32 loc; @@ -101,6 +120,10 @@ typedef enum { VTYPE_3 } VType; +constexpr bool isPowOf2(u64 num){ + return !((num - 1) & num); +} + // Log2 for power of 2 integers //#define LOG2(x) __builtin_ctzl(x) constexpr U64 getPow2Log2(U64 val) { diff --git a/src/dynamic/fileReader.h b/src/dynamic/fileReader.h index 5b5c4cf..d2ba591 100644 --- a/src/dynamic/fileReader.h +++ b/src/dynamic/fileReader.h @@ -26,11 +26,12 @@ Edge convertCSVLineIntoEdge(const char delim, const string& line, bool weighted) getline(ss, data, delim); e.source = stol(data); getline(ss, data, delim); e.destination = stol(data); - getline(ss, data, delim); /*time = stol(data);*/ + //getline(ss, data, delim); /*time = stol(data);*/ if(weighted){ - getline(ss, data, delim); - e.weight = stol(data); + //getline(ss, data, delim); + //e.weight = stol(data); + e.weight = (rand() % 8) + 8; } if(line[0] == '-'){ @@ -75,7 +76,7 @@ void readBatchFromCSV(EdgeList& el, ifstream& in, int batchSize, int batch_id, b while(getline(in, line)){ if(line != ""){ - Edge e = convertCSVLineIntoEdge(',', line, weighted); + Edge e = convertCSVLineIntoEdge(' ', line, weighted); if(assignLogicalID(e.source, VMap, lastAssignedLogicalID)) e.sourceExists = true; if(assignLogicalID(e.destination, VMap, lastAssignedLogicalID)) e.destExists = true; //e.batch_id = batch_id; diff --git a/src/dynamic/frontEnd.cc b/src/dynamic/frontEnd.cc index aefffb5..e4fb14d 100644 --- a/src/dynamic/frontEnd.cc +++ b/src/dynamic/frontEnd.cc @@ -12,7 +12,6 @@ #include "parser.h" #include "../common/timer.h" - using namespace std; /* Main thread that launches everything else */ @@ -33,70 +32,68 @@ int main(int argc, char *argv[]) { el.reserve(opts.batch_size); Timer t; - dataStruc *ds = createDataStruc(opts.type, opts.weighted, opts.directed, opts.num_nodes, opts.num_threads); + dataStruc *ds = createDataStruc(opts.type, opts.weighted, opts.directed, + opts.num_nodes, opts.num_threads); Algorithm alg(opts.algorithm, ds, opts.type); ofstream updF("Update.csv"); while (!file.eof()) { - readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID); + readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, + VMAP, lastAssignedNodeID); t.Start(); ds->update(el); t.Stop(); updF << t.Seconds() << endl; - cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl; - - alg.performAlg(); + cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes + << ", Edges " << ds->num_edges << endl; batch_id++; } - updF.close(); - - -// while (!file.eof()) { -// readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID); -// ds->update(el); -// cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl; -// //cout << "ins," << ((ds->num_edges * 1.0) / ds->num_nodes) << endl; -// batch_id++; -// } -// -// file.close(); - -// stringstream ss; -// ss << opts.filename << ".del"; -// file.open(ss.str()); -// if (!file.is_open()) { -// cout << "Couldn't open file " << ss.str() << endl; -// exit(-1); -// } -// -// batch_id = 0; -// while (!file.eof()) { -// readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID); -// -// t.Start(); -// ds->update(el); -// t.Stop(); -// -// updF << t.Seconds() << endl; -// //cout << "del," << ((ds->num_edges * 1.0) / ds->num_nodes) << endl; -// cout << "Deleted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl; -// -// alg.performAlg(); -// -// batch_id++; -// } -// updF.close(); + file.close(); + +#ifndef ENABLE_PROFILING + alg.performAlg(); +#endif + if (opts.enDeleteEdges) { + ofstream updD("delUpdate.csv"); + stringstream ss; + ss << opts.filename << ".del"; + file.open(ss.str()); + if (!file.is_open()) { + cout << "Couldn't open file for delete" << ss.str() << endl; + exit(-1); + } + + batch_id = 0; + el.clear(); + while (!file.eof()) { + readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, + VMAP, lastAssignedNodeID); + + t.Start(); + ds->update(el); + t.Stop(); + + updD << t.Seconds() << endl; + cout << "Deleted Batch " << batch_id << ": Nodes " << ds->num_nodes + << ", Edges " << ds->num_edges << endl; + + batch_id++; + } + file.close(); + updD.close(); + } ds->print(); + if (ds) { + delete ds; + } + #ifdef CALC_EDGE_TOUCHED cout << "EDGES TOUCHED: " << g_edge_touched << endl; #endif -#ifdef CALC_TYPE_SWITCH - cout << "Switch count: " << ds->switchCnt << endl; -#endif } diff --git a/src/dynamic/parser.cc b/src/dynamic/parser.cc index ced3352..e01d7d2 100644 --- a/src/dynamic/parser.cc +++ b/src/dynamic/parser.cc @@ -50,6 +50,7 @@ void printUsage() << "-n max number of nodes to initialize with\n" << "-a algorithm algorithm to run (default: bfsdyn)\n" << "-t number of threads (default: 16)\n" + << "-r enable edge deletion 0=disable 1=enable\n" << " DATA STRUCTURE OPTIONS:\n" << " 1) adList (single-threaded) \n" << " 2) adListShared (multihtreaded shared style) \n" @@ -78,7 +79,7 @@ cmd_args parse(int argc, char *argv[]) { cmd_args args; int opt = 0; - while(-1 != (opt = getopt(argc, argv, "f:b:w:d:s:n:a:t:h"))) { + while(-1 != (opt = getopt(argc, argv, "f:b:w:d:s:n:a:t:h:r"))) { switch(opt) { case 'f': // if (getSuffix(optarg) != ".csv") { @@ -130,6 +131,17 @@ cmd_args parse(int argc, char *argv[]) case 't': args.num_threads = atoi(optarg); break; + case 'r': + if(atoi(optarg) == 1) { + args.enDeleteEdges = true; + } else if (atoi(optarg) == 0) { + args.enDeleteEdges = false; + } else { + std::cerr << "-r only takes 0 or 1" << std::endl; + printUsage(); + exit(-1); + } + break; case 'a': args.algorithm = optarg; if (!supportedAlg(args.algorithm)) { diff --git a/src/dynamic/parser.h b/src/dynamic/parser.h index 3472905..61f5747 100644 --- a/src/dynamic/parser.h +++ b/src/dynamic/parser.h @@ -7,6 +7,7 @@ struct cmd_args { int batch_size = 0; bool directed = false; bool weighted = false; + bool enDeleteEdges = false; int64_t num_nodes = 0; std::string filename; std::string type = "graphTango"; diff --git a/src/dynamic/traversal.h b/src/dynamic/traversal.h index a9e385c..29ea32b 100644 --- a/src/dynamic/traversal.h +++ b/src/dynamic/traversal.h @@ -29,14 +29,19 @@ template class neighborhood_iter> { friend class neighborhood> ; private: - U* cursor; + u32 idx; + U* arr; + U** blocks; + bool isType3; public: - neighborhood_iter(U* _cursor) { - cursor = _cursor; - } + neighborhood_iter(u32 idx, U* arr, U** blocks, bool isType3) : + idx(idx), + arr(arr), + blocks(blocks), + isType3(isType3) { } bool operator!=(const neighborhood_iter> &it) { - return cursor != it.cursor; + return idx != it.idx; } neighborhood_iter& operator++() { @@ -44,8 +49,11 @@ class neighborhood_iter> { #pragma omp atomic g_edge_touched++; #endif - - cursor++; + idx++; + if(isType3 && !(idx % EdgeArray::BLOCK_SIZE)){ + blocks++; + arr = *blocks; + } return *this; } @@ -54,16 +62,22 @@ class neighborhood_iter> { #pragma omp atomic g_edge_touched++; #endif - cursor++; + idx++; + if(isType3 && !(idx % EdgeArray::BLOCK_SIZE)){ + blocks++; + arr = *blocks; + } return *this; } - NodeID operator*() { - return cursor->getNodeID(); + NodeID operator*() const { + u32 offset = idx % EdgeArray::BLOCK_SIZE; + return arr[offset].getNodeID(); } - Weight extractWeight() { - return cursor->getWeight(); + Weight extractWeight() const { + u32 offset = idx % EdgeArray::BLOCK_SIZE; + return arr[offset].getWeight(); } }; @@ -782,39 +796,55 @@ class neighborhood> { || defined(USE_GT_BALANCED_DYN_PARTITION) \ || defined(USE_GT_BALANCED_ABSEIL) \ || defined(USE_GT_BALANCED_RHH) \ - || defined(USE_GT_BALANCED_TSL_RHH) + || defined(USE_GT_BALANCED_TSL_RHH) \ + || defined(USE_GT_LOAD_BALANCED) template class neighborhood> { private: - U* _start; - uint64_t _size; + + uint64_t degree; + U* arr = nullptr; + U** blocks = nullptr; + bool isType3 = false; + + public: neighborhood(NodeID _node, GraphTango *_ds, bool _in_neigh) { if(_in_neigh){ if(_ds->vArray[_node].inEdges.capacity <= EdgeArray::TH0){ - _start = _ds->vArray[_node].inEdges.etype.type1.neigh; + arr = _ds->vArray[_node].inEdges.etype.type1.neigh; + } + else if(_ds->vArray[_node].inEdges.capacity <= EdgeArray::TH1){ + arr = _ds->vArray[_node].inEdges.etype.type2.neighArr; } else{ - _start = _ds->vArray[_node].inEdges.etype.type2_3.neighArr; + arr = _ds->vArray[_node].inEdges.etype.type3.blockList[0]; + blocks = _ds->vArray[_node].inEdges.etype.type3.blockList.data(); + isType3 = true; } - _size = _ds->vArray[_node].inEdges.degree; + degree = _ds->vArray[_node].inEdges.degree; } else{ if(_ds->vArray[_node].outEdges.capacity <= EdgeArray::TH0){ - _start = _ds->vArray[_node].outEdges.etype.type1.neigh; + arr = _ds->vArray[_node].outEdges.etype.type1.neigh; + } + else if(_ds->vArray[_node].outEdges.capacity <= EdgeArray::TH1){ + arr = _ds->vArray[_node].outEdges.etype.type2.neighArr; } else{ - _start = _ds->vArray[_node].outEdges.etype.type2_3.neighArr; + arr = _ds->vArray[_node].outEdges.etype.type3.blockList[0]; + blocks = _ds->vArray[_node].outEdges.etype.type3.blockList.data(); + isType3 = true; } - _size = _ds->vArray[_node].outEdges.degree; + degree = _ds->vArray[_node].outEdges.degree; } } - neighborhood_iter> begin() { - return neighborhood_iter>(_start); + neighborhood_iter> begin() const { + return neighborhood_iter>(0, arr, blocks, isType3); } - neighborhood_iter> end() { - return neighborhood_iter>(_start + _size); + neighborhood_iter> end() const { + return neighborhood_iter>(degree, arr, blocks, isType3); } };