diff --git a/.gitignore b/.gitignore
index ccbd28d..8ed0b24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 .settings/
 .project
 .cproject
+.vscode/
 bin/
 obj/
 inputResource/*.csv
diff --git a/profile.sh b/profile.sh
index cde1395..3534615 100755
--- a/profile.sh
+++ b/profile.sh
@@ -11,7 +11,7 @@ export OMP_PROC_BIND=close
 #export OMP_PLACES={2}:64:1
 export OMP_PLACES=threads
 
-dataDir=./inputResource/
+dataDir=../
 #STRUCTURES=(adListChunked adListShared degAwareRHH stinger)
 #STRUCTURES=(graphite adListChunked adListShared degAwareRHH stinger)
 STRUCTURES=(graphTango)
@@ -32,10 +32,8 @@ ALGORITHMS=(
 # Max num_nodes to initialize for each dataset
 declare -A DATASETS
 DATASETS=(
-       [LiveJournal.csv]=4847571       
-       [orkut.csv]=3072441       
-       [wiki-topcats.csv]=1791489
-       [WikiTalk.csv]=2394385
+		[orkut.el]=3072441
+#    [twitter.el]=61578415   
 #		[rmat_1_1.csv]=1048576
 #		[rmat_1_2.csv]=1048576
 #		[rmat_1_4.csv]=1048576
diff --git a/src/dynamic/GraphTango.h b/src/dynamic/GraphTango.h
index 2feb6e2..7263df7 100644
--- a/src/dynamic/GraphTango.h
+++ b/src/dynamic/GraphTango.h
@@ -43,27 +43,409 @@ class GraphTango : public dataStruc {
 #ifdef CALC_MEM_PER_EDGE
 		cout << "Total memory req: " << globalAllocator.totMem << endl;
 #endif
+	}
+
+#if defined(USE_GT_LOAD_BALANCED)
 
+	Vertex<Neigh>* vArray;
+
+	//VertexArray<Neigh> vArray;
+	const int num_threads;
+
+#if defined(CALC_TYPE_SWITCH) || defined(CALC_DYNNAMIC_TYPE_MAPPING)
+	typedef struct{
+		u64 edgeCnt = 0;
+		u64 nodeCnt = 0;
+		u64 switchCnt = 0;
+		u64 type1 = 0;
+		u64 type2 = 0;
+		u64 type3 = 0;
+		u8  pad[16];
+	} ThreadInfo;
+#else
+	typedef struct{
+		u64 edgeCnt = 0;
+		u64 nodeCnt = 0;
+		vector<vector<Edge>> inBuckets;
+		vector<vector<Edge>> outBuckets;
+	} ThreadInfo;
+#endif
+
+	alignas(64) ThreadInfo thInfo[32];
+
+	GraphTango(bool weighted, bool directed, i64 numNodes, i64 numThreads) : dataStruc(weighted, directed), num_threads(numThreads){
+#ifdef _OPENMP
+		if(numThreads > 0){
+			omp_set_num_threads(numThreads);
+		}
+#endif
+		vArray = (Vertex<Neigh>*)globalAllocator.allocate(sizeof(Vertex<Neigh>) * numNodes);
+
+		#pragma omp parallel for
+		for(u64 i = 0; i < numNodes; i++){
+			vArray[i].inEdges.degree = 0;
+			vArray[i].inEdges.capacity = EdgeArray<Neigh>::TH0;
+
+			vArray[i].outEdges.degree = 0;
+			vArray[i].outEdges.capacity = EdgeArray<Neigh>::TH0;
+		}
+
+		#pragma omp parallel for
+		for(i32 i = 0; i < numThreads; i++){
+			thInfo[i].inBuckets.resize(LB_NUMBER_OF_BUCKETS);
+			thInfo[i].outBuckets.resize(LB_NUMBER_OF_BUCKETS);
+		}
+
+		cout << "TH0: " << EdgeArray<Neigh>::TH0 << endl;
+		cout << "TH1: " << EdgeArray<Neigh>::TH1 << endl;
+		cout << "Sizeof ThreadInfo: " << sizeof(ThreadInfo) << endl;
+		cout << "Sizeof EdgeArray: " << sizeof(EdgeArray<Neigh>) << endl;
+		cout << "Sizeof Vertex: " << sizeof(Vertex<Neigh>) << endl;
+
+#ifdef _OPENMP
+		cout << "Max threads: " << omp_get_max_threads() << endl;
+#endif
+
+		property.resize(numNodes, -1);
+		affected.resize(numNodes);
+		affected.fill(false);
+	}
+
+	virtual ~GraphTango(){
+#ifdef CALC_STATIC_TYPE_MAPPING
+		u64 type1 = 0;
+		u64 type2 = 0;
+		u64 type3 = 0;
+		for(u64 i = 0; i < num_nodes; i++){
+			u64 numNeigh = in_degree(i);
+			if(numNeigh <= EdgeArray<Neigh>::TH0){
+				type1++;
+			}
+			else if(numNeigh <= EdgeArray<Neigh>::TH1){
+				type2++;
+			}
+			else{
+				type3++;
+			}
+			numNeigh = out_degree(i);
+			if(numNeigh <= EdgeArray<Neigh>::TH0){
+				type1++;
+			}
+			else if(numNeigh <= EdgeArray<Neigh>::TH1){
+				type2++;
+			}
+			else{
+				type3++;
+			}
+		}
+		cout << "Static type mapping: \n\tType1: " << type1 << "\t\tType2: " << type2 << "\t\tType3: " << type3 << endl;
+#endif
+#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+		u64 dynType1 = 0;
+		u64 dynType2 = 0;
+		u64 dynType3 = 0;
+		for(u64 i = 0; i < num_threads; i++){
+			dynType1 += thInfo[i].type1;
+			dynType2 += thInfo[i].type2;
+			dynType3 += thInfo[i].type3;
+		}
+		cout << "Dynamic type mapping: \n\tType1: " << dynType1 << "\t\tType2: " << dynType2 << "\t\tType3: " << dynType3 << endl;
+#endif
+#ifdef CALC_TYPE_SWITCH
+		u32 switchCnt = 0;
+		for(u64 i = 0; i < num_threads; i++){
+			switchCnt += thInfo[i].switchCnt;
+		}
+		cout << "Switch Count: " << switchCnt << endl;
+#endif
+	}
+
+	int64_t in_degree(NodeID n) override {
+		return vArray[n].inEdges.degree;
+	}
+
+	int64_t out_degree(NodeID n) override {
+		return vArray[n].outEdges.degree;
+	}
+
+	void update(const EdgeList& el) override {
+		//probe = 0;
+		const u64 batchSize = el.size();
+
+#ifdef _OPENMP
+		const u64 elemPerTh = ceil(1.0 * batchSize / num_threads);
+
+		//clear the buckets
+		#pragma omp parallel for
+		for(i32 i = 0; i < num_threads; i++){
+			ThreadInfo& th = thInfo[i];
+			for(u32 j = 0; j < LB_NUMBER_OF_BUCKETS; j++){
+				th.inBuckets[j].clear();
+				th.outBuckets[j].clear();
+			}
+		}
+
+		//distribute edges to the buckets
+		#pragma omp parallel
+		{
+			const i64 actualTh = omp_get_thread_num();
+			ThreadInfo& th = thInfo[actualTh];
+			const u64 startIdx = actualTh * elemPerTh;
+			const u64 endIdx = (startIdx + elemPerTh) > batchSize ? batchSize : (startIdx + elemPerTh);
+
+			for(u64 i = startIdx; i < endIdx; i++){
+				const i64 src = el[i].source;
+				const i64 dst = el[i].destination;
+				if(!el[i].sourceExists){
+					th.nodeCnt++;
+				}
+				if(!el[i].destExists){
+					th.nodeCnt++;
+				}
+				const u64 srcBucketIdx = (src / 64) % LB_NUMBER_OF_BUCKETS;
+				const u64 dstBucketIdx = (dst / 64) % LB_NUMBER_OF_BUCKETS;
+
+				th.inBuckets[dstBucketIdx].push_back(el[i]);
+				th.outBuckets[srcBucketIdx].push_back(el[i]);
+			}
+		}
+
+
+		u64 nextBucketIdx = 0;
+
+		//process the buckets
+		#pragma omp parallel
+		{
+			const i64 actualTh = omp_get_thread_num();
+			while(true){
+				u64 currBucketId;
+				#pragma omp atomic capture
+				currBucketId = nextBucketIdx++;
+				if(currBucketId < LB_NUMBER_OF_BUCKETS){
+					u64 garbage;
+					//in bucket
+					for(u32 thId = 0; thId < num_threads; thId++){
+						const vector<Edge>& bucket = thInfo[thId].inBuckets[currBucketId];
+						for(const Edge& e : bucket){
+							const i64 src = e.source;
+							const i64 dst = e.destination;
+							if(!affected[dst]){
+								affected[dst] = true;
+							}
+							if(!e.isDelete){	//insertion to inEdges
+								vArray[dst].inEdges.insertEdge(src, e.weight, garbage);
+							}
+							else{				//deletion from inEdges
+								vArray[dst].inEdges.deleteEdge(src, garbage);
+							}
+						}
+					}
+				}
+				else if(currBucketId < LB_NUMBER_OF_BUCKETS * 2){
+					//out bucket
+					currBucketId = currBucketId - LB_NUMBER_OF_BUCKETS;
+					for(u32 thId = 0; thId < num_threads; thId++){
+						const vector<Edge>& bucket = thInfo[thId].outBuckets[currBucketId];
+						for(const Edge& e : bucket){
+							const i64 src = e.source;
+							const i64 dst = e.destination;
+							if(!affected[src]){
+								affected[src] = true;
+							}
+							if(!e.isDelete){	//insertion to outEdges
+								vArray[src].outEdges.insertEdge(dst, e.weight, thInfo[actualTh].edgeCnt);
+							}
+							else{				//deletion from outEdges
+								vArray[src].outEdges.deleteEdge(dst, thInfo[actualTh].edgeCnt);
+							}
+						}
+					}
+				}
+				else{
+					break;
+				}
+			}
+		}
 
-//		std::cout << "Inserts--------------------" << std::endl;
-//		std::cout << "    Total: " << insTot << std::endl;
-//		std::cout << "    Succ : " << insSucc << std::endl;
-//		std::cout << "    Fail : " << insTot - insSucc << std::endl;
-//		std::cout << std::endl;
 //
-//		std::cout << "Deletes--------------------" << std::endl;
-//		std::cout << "    Total: " << delTot << std::endl;
-//		std::cout << "    Succ : " << delSucc << std::endl;
-//		std::cout << "    Fail : " << delTot - delSucc << std::endl;
-//		std::cout << std::endl;
 //
-//		std::cout << "Final number of edges: " << insSucc - delSucc << std::endl;
-//		ofstream out("probing_dist.csv");
-//		for(auto it : probingDist){
-//			out << it.first << "," << it.second << endl;
+//
+//
+//		//int thMask = (1 << getNextPow2Log2(num_threads)) - 1;
+//
+//		#pragma omp parallel
+//		{
+//			const i64 actualTh = omp_get_thread_num();
+//			LIKWID_MARKER_START("upd");
+//			for(u64 i = 0; i < batchSize; i++){
+//				const i64 src = el[i].source;
+//				const i64 dst = el[i].destination;
+//
+//				//i64 targetTh = (src / 64) & thMask;
+//				i64 targetTh = (src / 64) % num_threads;
+//				if(targetTh == actualTh){
+//					if(!el[i].sourceExists){
+//						thInfo[actualTh].nodeCnt++;
+//					}
+//					if(!el[i].destExists){
+//						thInfo[actualTh].nodeCnt++;
+//					}
+//
+//					if(!affected[src]){
+//						affected[src] = true;
+//					}
+//
+//					#ifdef CALC_TYPE_SWITCH
+//					VType initType = VType::VTYPE_3;
+//					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						initType = VType::VTYPE_1;
+//					}
+//					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						initType = VType::VTYPE_2;
+//					}
+//					#endif
+//
+//					if(!el[i].isDelete){
+//						//insert out edge
+//						vArray[src].outEdges.insertEdge(dst, el[i].weight, thInfo[actualTh].edgeCnt);
+//					}
+//					else{
+//						//delete out edge
+//						vArray[src].outEdges.deleteEdge(dst, thInfo[actualTh].edgeCnt);
+//					}
+//
+//					#ifdef CALC_TYPE_SWITCH
+//					VType finType = VType::VTYPE_3;
+//					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						finType = VType::VTYPE_1;
+//					}
+//					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						finType = VType::VTYPE_2;
+//					}
+//					if(initType != finType){
+//						thInfo[actualTh].switchCnt++;
+//					}
+//					#endif
+//
+//					#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+//					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						thInfo[actualTh].type1++;
+//					}
+//					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						thInfo[actualTh].type2++;
+//					}
+//					else{
+//						thInfo[actualTh].type3++;
+//					}
+//					#endif
+//				}
+//
+//				//targetTh = (dst / 64) & thMask;
+//				targetTh = (dst / 64) % num_threads;
+//				if(targetTh == actualTh){
+//					if(!affected[dst]){
+//						affected[dst] = true;
+//					}
+//
+//					#ifdef CALC_TYPE_SWITCH
+//					VType initType = VType::VTYPE_3;
+//					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						initType = VType::VTYPE_1;
+//					}
+//					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						initType = VType::VTYPE_2;
+//					}
+//					#endif
+//
+//					u64 garbage;
+//					if(!el[i].isDelete){
+//						//insert in edge
+//						vArray[dst].inEdges.insertEdge(src, el[i].weight, garbage);
+//					}
+//					else{
+//						//delete in edge
+//						vArray[dst].inEdges.deleteEdge(src, garbage);
+//					}
+//
+//					#ifdef CALC_TYPE_SWITCH
+//					VType finType = VType::VTYPE_3;
+//					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						finType = VType::VTYPE_1;
+//					}
+//					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						finType = VType::VTYPE_2;
+//					}
+//					if(initType != finType){
+//						thInfo[actualTh].switchCnt++;
+//					}
+//					#endif
+//
+//					#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+//					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
+//						thInfo[actualTh].type1++;
+//					}
+//					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
+//						thInfo[actualTh].type2++;
+//					}
+//					else{
+//						thInfo[actualTh].type3++;
+//					}
+//					#endif
+//				}
+//
+//			}
+//			LIKWID_MARKER_STOP("upd");
 //		}
+#else
+		for(u64 i = 0; i < batchSize; i++){
+			const u64 src = el[i].source;
+			const u64 dst = el[i].destination;
+
+			if(!el[i].sourceExists){
+				thInfo[0].nodeCnt++;
+			}
+			if(!el[i].destExists){
+				thInfo[0].nodeCnt++;
+			}
+			//we do not need atomic operation on affected as long as "some" thread updates it
+			if(!affected[src]){
+				affected[src] = true;
+			}
+			if(!affected[dst]){
+				affected[dst] = true;
+			}
+
+			u64 garbage;
+			if(!el[i].isDelete){
+				//insertion
+				//insert out edge
+				vArray[src].outEdges.insertEdge(dst, el[i].weight, thInfo[0].edgeCnt);
+
+				//insert in edge
+				vArray[dst].inEdges.insertEdge(src, el[i].weight, garbage);
+			}
+			else{
+				//delete out edge
+				vArray[src].outEdges.deleteEdge(dst, thInfo[0].edgeCnt);
+
+				//delete in edge
+				vArray[dst].inEdges.deleteEdge(src, garbage);
+			}
+		}
+#endif
+
+		for(u64 i = 0; i < num_threads; i++){
+			num_edges += thInfo[i].edgeCnt;
+			thInfo[i].edgeCnt = 0;
+			num_nodes += thInfo[i].nodeCnt;
+			thInfo[i].nodeCnt = 0;
+		}
+		//num_nodes = el[batchSize - 1].lastAssignedId + 1;
 	}
 
+#endif
+
+
 #ifdef USE_HYBRID_HASHMAP
 
 	VertexArray<Neigh> vArray;
@@ -791,12 +1173,15 @@ class GraphTango : public dataStruc {
 	//VertexArray<Neigh> vArray;
 	const int num_threads;
 
-#ifdef CALC_TYPE_SWITCH
+#if defined(CALC_TYPE_SWITCH) || defined(CALC_DYNNAMIC_TYPE_MAPPING)
 	typedef struct{
 		u64 edgeCnt = 0;
 		u64 nodeCnt = 0;
 		u64 switchCnt = 0;
-		u8 pad[40];
+		u64 type1 = 0;
+		u64 type2 = 0;
+		u64 type3 = 0;
+		u8  pad[16];
 	} ThreadInfo;
 #else
 	typedef struct{
@@ -840,7 +1225,46 @@ class GraphTango : public dataStruc {
 		affected.fill(false);
 	}
 
-	~GraphTango(){
+	virtual ~GraphTango(){
+#ifdef CALC_STATIC_TYPE_MAPPING
+		u64 type1 = 0;
+		u64 type2 = 0;
+		u64 type3 = 0;
+		for(u64 i = 0; i < num_nodes; i++){
+			u64 numNeigh = in_degree(i);
+			if(numNeigh <= EdgeArray<Neigh>::TH0){
+				type1++;
+			}
+			else if(numNeigh <= EdgeArray<Neigh>::TH1){
+				type2++;
+			}
+			else{
+				type3++;
+			}
+			numNeigh = out_degree(i);
+			if(numNeigh <= EdgeArray<Neigh>::TH0){
+				type1++;
+			}
+			else if(numNeigh <= EdgeArray<Neigh>::TH1){
+				type2++;
+			}
+			else{
+				type3++;
+			}
+		}
+		cout << "Static type mapping: \n\tType1: " << type1 << "\t\tType2: " << type2 << "\t\tType3: " << type3 << endl;
+#endif
+#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+		u64 dynType1 = 0;
+		u64 dynType2 = 0;
+		u64 dynType3 = 0;
+		for(u64 i = 0; i < num_threads; i++){
+			dynType1 += thInfo[i].type1;
+			dynType2 += thInfo[i].type2;
+			dynType3 += thInfo[i].type3;
+		}
+		cout << "Dynamic type mapping: \n\tType1: " << dynType1 << "\t\tType2: " << dynType2 << "\t\tType3: " << dynType3 << endl;
+#endif
 #ifdef CALC_TYPE_SWITCH
 		u32 switchCnt = 0;
 		for(u64 i = 0; i < num_threads; i++){
@@ -889,10 +1313,10 @@ class GraphTango : public dataStruc {
 
 					#ifdef CALC_TYPE_SWITCH
 					VType initType = VType::VTYPE_3;
-					if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH0){
+					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
 						initType = VType::VTYPE_1;
 					}
-					else if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH1){
+					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
 						initType = VType::VTYPE_2;
 					}
 					#endif
@@ -908,16 +1332,28 @@ class GraphTango : public dataStruc {
 
 					#ifdef CALC_TYPE_SWITCH
 					VType finType = VType::VTYPE_3;
-					if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH0){
+					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
 						finType = VType::VTYPE_1;
 					}
-					else if(vArray[src].outEdges.capacity <= vArray[src].outEdges.TH1){
+					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
 						finType = VType::VTYPE_2;
 					}
 					if(initType != finType){
 						thInfo[actualTh].switchCnt++;
 					}
 					#endif
+
+					#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+					if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH0){
+						thInfo[actualTh].type1++;
+					}
+					else if(vArray[src].outEdges.capacity <= EdgeArray<Neigh>::TH1){
+						thInfo[actualTh].type2++;
+					}
+					else{
+						thInfo[actualTh].type3++;
+					}
+					#endif
 				}
 
 				//targetTh = (dst / 64) & thMask;
@@ -929,10 +1365,10 @@ class GraphTango : public dataStruc {
 
 					#ifdef CALC_TYPE_SWITCH
 					VType initType = VType::VTYPE_3;
-					if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH0){
+					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
 						initType = VType::VTYPE_1;
 					}
-					else if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH1){
+					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
 						initType = VType::VTYPE_2;
 					}
 					#endif
@@ -949,16 +1385,28 @@ class GraphTango : public dataStruc {
 
 					#ifdef CALC_TYPE_SWITCH
 					VType finType = VType::VTYPE_3;
-					if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH0){
+					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
 						finType = VType::VTYPE_1;
 					}
-					else if(vArray[dst].inEdges.capacity <= vArray[dst].inEdges.TH1){
+					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
 						finType = VType::VTYPE_2;
 					}
 					if(initType != finType){
 						thInfo[actualTh].switchCnt++;
 					}
 					#endif
+
+					#ifdef CALC_DYNNAMIC_TYPE_MAPPING
+					if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH0){
+						thInfo[actualTh].type1++;
+					}
+					else if(vArray[dst].inEdges.capacity <= EdgeArray<Neigh>::TH1){
+						thInfo[actualTh].type2++;
+					}
+					else{
+						thInfo[actualTh].type3++;
+					}
+					#endif
 				}
 
 			}
@@ -1007,10 +1455,6 @@ class GraphTango : public dataStruc {
 			thInfo[i].edgeCnt = 0;
 			num_nodes += thInfo[i].nodeCnt;
 			thInfo[i].nodeCnt = 0;
-#ifdef CALC_TYPE_SWITCH
-			switchCnt += thInfo[i].switchCnt;
-			thInfo[i].switchCnt = 0;
-#endif
 		}
 		//num_nodes = el[batchSize - 1].lastAssignedId + 1;
 	}
@@ -1018,7 +1462,6 @@ class GraphTango : public dataStruc {
 #endif
 
 
-
 #ifdef USE_GT_BALANCED_TYPE3_ONLY
 
 	Vertex<Neigh>* vArray;
diff --git a/src/dynamic/GraphTangoHash.h b/src/dynamic/GraphTangoHash.h
deleted file mode 100644
index e43b07f..0000000
--- a/src/dynamic/GraphTangoHash.h
+++ /dev/null
@@ -1,392 +0,0 @@
-#pragma once
-
-#include <cstdint>
-#include <immintrin.h>
-#include <cassert>
-#include <cstring>
-#include <vector>
-#include <map>
-
-#include "common.h"
-#include "CustomAllocator.h"
-
-//typedef uint64_t u64;
-//typedef uint32_t u32;
-//typedef uint8_t  u8;
-
-#define USE_64_BIT_KEY
-
-#ifdef USE_64_BIT_KEY
-typedef u64 Key;
-#else
-typedef u32 Key;
-#endif
-
-#define CACHE_LINE_SIZE			64U
-#define ELEMS_IN_LINE			(CACHE_LINE_SIZE / sizeof(Key))
-
-#ifdef USE_64_BIT_KEY
-#define FLAG_EMPTY_SLOT			0xFFFFFFFFFFFFFFFFULL
-#define FLAG_TOMB_STONE			0xFFFFFFFFFFFFFFFEULL
-#define CONST_FACTOR_A			11400714818402812347ULL
-#define NUM_INITIAL_ELEMS		(5 + 8)
-#else
-#define FLAG_EMPTY_SLOT			0xFFFFFFFFU
-#define FLAG_TOMB_STONE			0xFFFFFFFEU
-#define CONST_FACTOR_A			2654435769U
-#endif
-
-
-
-using namespace std;
-
-#ifdef USE_CAHCE_FRIENDLY_HASH
-
-template <typename Neigh>
-class GraphTangoHash{
-public:
-
-#ifdef USE_64_BIT_KEY
-	static constexpr u8 rotation[8] = {0, 5, 2, 6, 1, 4, 7, 3};
-	constexpr u32 getShiftAmt(){
-		return 64 - __builtin_ctzl(capacity / ELEMS_IN_LINE);
-	}
-#else
-	//static constexpr u8 rotation[16] = {0, 1, 4, 9, 15, 2, 10, 6, 3, 8, 14, 5, 12, 7, 11, 13};
-	static constexpr u8 rotation[16] = {0, 9, 6, 15, 2, 11, 4, 13, 3, 7, 10, 14, 5, 1, 8, 12};
-	constexpr u32 getShiftAmt(){
-		return 32 - __builtin_ctzl(capacity / ELEMS_IN_LINE);
-	}
-#endif
-
-	u32 degree = 0;
-	u32 capacity = 0;
-	Neigh* __restrict neighArr = nullptr;
-	Neigh neigh[NUM_INITIAL_ELEMS];
-
-	//map<u32, u32>	probes;
-
-	GraphTangoHash(){
-		//capacity = NUM_INITIAL_ELEMS;
-		//neighArr = (Neigh*)globalAllocator.allocate(capacity * sizeof(Neigh));
-		//memset(neighArr, 0xff, capacity * sizeof(Neigh));
-	}
-
-	~GraphTangoHash(){
-		//free(neighArr);
-	}
-
-//	inline u32 hash1(u32 key){
-//		return (u32)(key * CONST_FACTOR_A) >> (32 - capacity / ELEMS_IN_LINE + 1);
-//	}
-//
-//	inline u32 hash2(u32 key){
-//		const u32 mask = capacity / ELEMS_IN_LINE - 1;
-//		return (key & mask) | 1;		//return an odd value
-//	}
-//
-//	u32 find(u32 key){
-//		const u32 numCacheLines = capacity / ELEMS_IN_LINE;
-//		const u32 h1 = hash1(key);
-//		for(u32 i = 0; i < numCacheLines; i++){
-//			const u32 offset = ((h1 + i * hash2(key)) & (numCacheLines - 1)) * ELEMS_IN_LINE;
-//			//cout << "\t" << offset << endl;
-//
-//			//check all of the brought cache line
-//			//#pragma GCC unroll 8
-//			for(int j = 0; j < ELEMS_IN_LINE; j++){
-//				if(neighArr[offset + j] == key){
-//					return offset + j;			//found
-//				}
-//				if(neighArr[offset + j] == FLAG_EMPTY_SLOT){
-//					return FLAG_EMPTY_SLOT;		//not found
-//				}
-//			}
-//		}
-//		assert(false);		//should never reach here
-//		return FLAG_EMPTY_SLOT;
-//	}
-
-
-	inline void insertDuringRehash(u32 key){
-		const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1;
-		const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt();		//[0,1,...,#cache_lines]
-		for(u32 i = 0; i <= cacheLineMask; i++){
-			const Key h2 = (key & cacheLineMask) | 1;
-			const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE;	//cyclic within [0,1,...,#cache_lines]
-
-			//check all elements of the cache line
-			//#pragma GCC unroll 8
-			for(int j = 0; j < ELEMS_IN_LINE; j++){
-#ifdef USE_64_BIT_KEY
-				const Key idx = base | ((key + rotation[j]) & 0x7);
-#else
-				const Key idx = base | ((key + rotation[j]) & 0xf);
-#endif
-				if(neighArr[idx].node == FLAG_EMPTY_SLOT){
-					neighArr[idx].node = key;	//successful insertion
-					return;
-				}
-			}
-		}
-	}
-
-	void rehash(){
-		Neigh* __restrict oldArr = neighArr;
-		const u32 oldCap = capacity;
-
-		capacity = capacity * 2;
-		neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
-		memset(neighArr, 0xff, capacity * sizeof(Key));	//reset new array
-		for(u32 i = 0; i < oldCap; i++){
-			const Key key = oldArr[i].node;
-			if(key < FLAG_TOMB_STONE){
-				insertDuringRehash(key);
-			}
-		}
-
-		globalAllocator.freePow2(oldArr, oldCap);
-	}
-
-	void insert(Key key, u64& edgeCnt){
-		if(degree < HYBRID_HASH_PARTITION){
-			//linear search
-			for(u32 i = 0; i < degree; i++){
-				if(neighArr[i].node == key){
-					//found duplicate, nothing to do
-					return;
-				}
-			}
-			neighArr[degree].node = key;
-			degree++;
-			edgeCnt++;
-
-			if(__builtin_expect(degree == capacity, 0)){
-				// Two things can happen now.
-				// 1. If we reached the partition threshold, switch to hash table
-				// 2. Otherwise, just grow neighArr
-				Neigh* __restrict oldArr = neighArr;
-				const u32 oldCap = capacity;
-				if(__builtin_expect(degree == HYBRID_HASH_PARTITION, 0)){
-					//switch to hash table
-					capacity = capacity * 4;
-					neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
-					memset(neighArr, 0xff, capacity * sizeof(Neigh));	//reset new array
-					for(u32 i = 0; i < degree; i++){
-						insertDuringRehash(oldArr[i].node);
-					}
-				}
-				else{
-					//grow neighArr
-					capacity = getNextPow2MinRet(capacity * 2);
-					neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
-					memcpy(neighArr, oldArr, degree * sizeof(Neigh));
-				}
-				if(oldArr != neigh){
-					globalAllocator.freePow2(oldArr, oldCap);
-				}
-			}
-		}
-		else{
-			//hash based search
-			if(__builtin_expect(degree > (capacity / 2), 0)){
-				//Load factor is 0.5. Grow table
-				rehash();
-			}
-
-			//u32 probelen = 0;
-			const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1;
-			const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt();		//[0,1,...,#cache_lines]
-			for(Key i = 0; i <= cacheLineMask; i++){
-				const Key h2 = (key & cacheLineMask) | 1;
-				const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE;	//cyclic within [0,1,...,#cache_lines]
-
-				//check all elements of the cache line
-				//#pragma GCC unroll 8
-				for(int j = 0; j < ELEMS_IN_LINE; j++){
-	#ifdef USE_64_BIT_KEY
-					const Key idx = base | ((key + rotation[j]) & 0x7);
-	#else
-					const Key idx = base | ((key + rotation[j]) & 0xf);
-	#endif
-					//probelen++;
-					if(neighArr[idx].node >= FLAG_TOMB_STONE){
-						neighArr[idx].node = key;	//successful insertion
-						degree++;
-						edgeCnt++;
-						//probes[probelen]++;
-						return;
-					}
-					if(neighArr[idx].node == key){
-						return;					//found, no need to do anything
-					}
-				}
-			}
-		}
-	}
-
-	void erase(u32 key){
-
-	}
-
-};
-
-template<typename Neigh>
-constexpr u8 GraphTangoHash<Neigh>::rotation[8];
-
-#endif
-
-
-#ifdef USE_CAHCE_FRIENDLY_HASH_ONLY
-
-template <typename Neigh>
-class GraphTangoHash{
-public:
-
-#ifdef USE_64_BIT_KEY
-	static constexpr u8 rotation[8] = {0, 5, 2, 6, 1, 4, 7, 3};
-	constexpr u32 getShiftAmt(){
-		return 64 - __builtin_ctzl(capacity / ELEMS_IN_LINE);
-	}
-#else
-	//static constexpr u8 rotation[16] = {0, 1, 4, 9, 15, 2, 10, 6, 3, 8, 14, 5, 12, 7, 11, 13};
-	static constexpr u8 rotation[16] = {0, 9, 6, 15, 2, 11, 4, 13, 3, 7, 10, 14, 5, 1, 8, 12};
-	constexpr u32 getShiftAmt(){
-		return 32 - __builtin_ctzl(capacity / ELEMS_IN_LINE);
-	}
-#endif
-
-	u32 degree = 0;
-	u32 capacity = 0;
-	Neigh* __restrict neighArr = nullptr;
-	Neigh* __restrict adjList = nullptr;
-	Neigh neigh[NUM_INITIAL_ELEMS];
-
-	//map<u32, u32>	probes;
-
-	GraphTangoHash(){
-		//capacity = NUM_INITIAL_ELEMS;
-		//neighArr = (Neigh*)globalAllocator.allocate(capacity * sizeof(Neigh));
-		//memset(neighArr, 0xff, capacity * sizeof(Neigh));
-	}
-
-	~GraphTangoHash(){
-		//free(neighArr);
-	}
-
-	inline void insertDuringRehash(u32 key){
-		const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1;
-		const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt();		//[0,1,...,#cache_lines]
-		for(u32 i = 0; i <= cacheLineMask; i++){
-			const Key h2 = (key & cacheLineMask) | 1;
-			const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE;	//cyclic within [0,1,...,#cache_lines]
-
-			//check all elements of the cache line
-			//#pragma GCC unroll 8
-			for(int j = 0; j < ELEMS_IN_LINE; j++){
-#ifdef USE_64_BIT_KEY
-				const Key idx = base | ((key + rotation[j]) & 0x7);
-#else
-				const Key idx = base | ((key + rotation[j]) & 0xf);
-#endif
-				if(neighArr[idx].node == FLAG_EMPTY_SLOT){
-					neighArr[idx].node = key;	//successful insertion
-					return;
-				}
-			}
-		}
-	}
-
-	void rehash(){
-		Neigh* __restrict oldArr = neighArr;
-		const u32 oldCap = capacity;
-
-		capacity = capacity * 2;
-		neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
-		memset(neighArr, 0xff, capacity * sizeof(Key));	//reset new array
-		for(u32 i = 0; i < oldCap; i++){
-			const Key key = oldArr[i].node;
-			if(key < FLAG_TOMB_STONE){
-				insertDuringRehash(key);
-			}
-		}
-
-		globalAllocator.freePow2(oldArr, oldCap);
-	}
-
-	void insert(Key key, u64& edgeCnt){
-		if(degree == NUM_INITIAL_ELEMS){
-			//switch to hash table, nut key is not yet inserted
-			capacity = getNextPow2MinRet(NUM_INITIAL_ELEMS * 4);
-			neighArr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
-			memset(neighArr, 0xff, capacity * sizeof(Neigh));	//reset new array
-			for(u32 i = 0; i < NUM_INITIAL_ELEMS; i++){
-				insertDuringRehash(neigh[i].node);
-			}
-		}
-
-		if(degree < NUM_INITIAL_ELEMS){
-			//linear search
-			for(u32 i = 0; i < degree; i++){
-				if(neigh[i].node == key){
-					//found duplicate, nothing to do
-					return;
-				}
-			}
-			neigh[degree].node = key;
-			degree++;
-			edgeCnt++;
-		}
-		else{
-			//hash based search
-			if(__builtin_expect(degree > (capacity / 2), 0)){
-				//Load factor is 0.5. Grow table
-				rehash();
-			}
-
-			//u32 probelen = 0;
-			const Key cacheLineMask = capacity / ELEMS_IN_LINE - 1;
-			const Key h1 = (Key)(key * CONST_FACTOR_A) >> getShiftAmt();		//[0,1,...,#cache_lines]
-			for(Key i = 0; i <= cacheLineMask; i++){
-				const Key h2 = (key & cacheLineMask) | 1;
-				const Key base = ((h1 + i * h2) & cacheLineMask) * ELEMS_IN_LINE;	//cyclic within [0,1,...,#cache_lines]
-
-				//check all elements of the cache line
-				//#pragma GCC unroll 8
-				for(int j = 0; j < ELEMS_IN_LINE; j++){
-	#ifdef USE_64_BIT_KEY
-					const Key idx = base | ((key + rotation[j]) & 0x7);
-	#else
-					const Key idx = base | ((key + rotation[j]) & 0xf);
-	#endif
-					//probelen++;
-					if(neighArr[idx].node >= FLAG_TOMB_STONE){
-						neighArr[idx].node = key;	//successful insertion
-						degree++;
-						edgeCnt++;
-						//probes[probelen]++;
-						adjList = nullptr;	//adjacency list no longer valid
-						return;
-					}
-					if(neighArr[idx].node == key){
-						return;					//found, no need to do anything
-					}
-				}
-			}
-		}
-	}
-
-	void erase(Key key, u64& edgeCnt){
-
-	}
-
-};
-
-template<typename Neigh>
-constexpr u8 GraphTangoHash<Neigh>::rotation[8];
-
-#endif
-
-
-
-
diff --git a/src/dynamic/LockFreePoolWithList.h b/src/dynamic/LockFreePoolWithList.h
index 6798e8d..c36abb4 100644
--- a/src/dynamic/LockFreePoolWithList.h
+++ b/src/dynamic/LockFreePoolWithList.h
@@ -11,7 +11,7 @@
 
 #define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
 
-template <u64 MAX_THREADS = 16, u64 MAX_SEGMENT_BITS = 32, u64 BLOCK_SIZE = (1UL << 22)>
+template <u64 MAX_THREADS = 32, u64 MAX_SEGMENT_BITS = 32, u64 BLOCK_SIZE = (1UL << 22)>
 class LockFreePoolWithList {
 	alignas(64) void* __restrict nextFreePtrs[MAX_THREADS][MAX_SEGMENT_BITS];
 
@@ -63,6 +63,7 @@ class LockFreePoolWithList {
 	}
 
 	void* allocPow2(u64 size){
+		assert(isPowOf2(size));
 		return allocLog2(getPow2Log2(size));
 	}
 
@@ -81,6 +82,7 @@ class LockFreePoolWithList {
 	}
 
 	void freePow2(void* __restrict ptr, u64 size){
+		assert(isPowOf2(size));
 		freeLog2(ptr, getPow2Log2(size));
 	}
 
diff --git a/src/dynamic/Vertex.h b/src/dynamic/Vertex.h
index 0dd543b..9da03ef 100644
--- a/src/dynamic/Vertex.h
+++ b/src/dynamic/Vertex.h
@@ -23,10 +23,11 @@ typedef robin_hood::unordered_flat_map<u32, u32> graphite_hashmap;
 typedef tsl::robin_map<u32, u32> graphite_hashmap;
 
 #else
-typedef std::unordered_map<u32, u32, std::unordered_map<u32, u32>::hasher, std::unordered_map<u32, u32>::key_equal, custom_allocator< std::pair<const u32,u32>> > graphite_hashmap;
+typedef std::unordered_map<u32, u32, std::unordered_map<u32, u32>::hasher,
+		std::unordered_map<u32, u32>::key_equal,
+		custom_allocator<std::pair<const u32, u32>> > graphite_hashmap;
 #endif
 
-
 #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING
 
 template <typename Neigh>
@@ -49,7 +50,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING_TIGHTER
 
 #define INITIAL_EDGES				5
@@ -75,60 +75,71 @@ class Vertex{
 
 #endif
 
-
-#if defined(USE_GT_BALANCED) || defined(USE_GT_BALANCED_DYN_PARTITION)
+#if defined(USE_GT_BALANCED) || defined(USE_GT_BALANCED_DYN_PARTITION) || defined(USE_GT_LOAD_BALANCED)
 
 #define 	FLAG_EMPTY_SLOT			0xFFFFFFFFU
 #define 	FLAG_TOMB_STONE			0xFFFFFFFEU
 #define		CACHE_LINE_SIZE			64
 
-template <typename Neigh>
-class alignas(CACHE_LINE_SIZE) EdgeArray{
+template<typename Neigh>
+class alignas(CACHE_LINE_SIZE) EdgeArray {
 
 private:
-	void rebuildHashTable(u64 oldCap, u64 newCap){
-		if(oldCap > TH1){
-			//free old map
-			globalAllocator.freePow2(etype.type2_3.mapArr, oldCap * 2 * sizeof(DstLocPair));
-			etype.type2_3.mapArr = nullptr;
-		}
-
-		if(newCap > TH1){
+	void rebuildHashTable(u64 oldCap, u64 newCap) {
+		//if newCap == power of 2
+		if (!(newCap & (newCap - 1))
+				&& (etype.type3.mapCapacity != (newCap * 2))) {
+			if (etype.type3.mapCapacity) {
+				globalAllocator.freePow2(etype.type3.mapArr,
+						etype.type3.mapCapacity * sizeof(DstLocPair));
+			}
 			//allocate new map
-			etype.type2_3.mapArr = (DstLocPair*)globalAllocator.allocate(newCap * 2 * sizeof(DstLocPair));
+			etype.type3.mapCapacity = newCap * 2;
+			etype.type3.mapArr = (DstLocPair*) globalAllocator.allocate(
+					etype.type3.mapCapacity * sizeof(DstLocPair));
+			DstLocPair *__restrict locMap = etype.type3.mapArr;
+			memset(locMap, -1, etype.type3.mapCapacity * sizeof(DstLocPair));
 
-			DstLocPair* __restrict locMap = etype.type2_3.mapArr;
-			memset(locMap, -1, newCap * 2 * sizeof(DstLocPair));
+			const u32 mask = etype.type3.mapCapacity - 1;
 
-			const u32 mask = newCap * 2 - 1;
+			assert(degree < etype.type3.mapCapacity);
 
 			//add existing nodes to hash
-			const Neigh* __restrict nn = etype.type2_3.neighArr;
-			for(u64 i = 0; i < degree; i++){
-				const u32 dst = nn[i].node;
-				u32 idx = dst & mask;
-				while(true){
-					if(locMap[idx].dst == FLAG_EMPTY_SLOT){
-						//found insertion point
-						locMap[idx].dst = dst;
-						locMap[idx].loc = i;
-						break;
-					}
-					//move on
-					idx++;
-					if(idx == (newCap * 2)){
-						idx = 0;
+			u64 loc = 0;
+			u32 blockNum = degree / BLOCK_SIZE;
+			assert(degree % BLOCK_SIZE == 0);
+			for (u32 j = 0; j < blockNum; j++) {
+				const Neigh *nn = etype.type3.blockList[j];
+				for (u64 i = 0; i < BLOCK_SIZE; i++) {
+					const u32 dst = nn[i].node;
+					assert((i32 )dst >= 0);
+					u32 idx = dst & mask;
+					while (true) {
+						if (locMap[idx].dst == FLAG_EMPTY_SLOT) {
+							//found insertion point
+							locMap[idx].dst = dst;
+							locMap[idx].loc = loc;
+							break;
+						}
+						//move on
+						idx++;
+						if (idx == (etype.type3.mapCapacity)) {
+							idx = 0;
+						}
 					}
+					loc++;
 				}
 			}
 		}
-	}
 
+	}
 
 public:
 
-	const static u64 TH0 = ((CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32)) / sizeof(Neigh));
+	const static u64 TH0 = ((CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32))
+			/ sizeof(Neigh));
 	const static u64 TH1 = HYBRID_HASH_PARTITION;
+	const static u64 BLOCK_SIZE = TH1; // when type3 fills out the existing block(s), we allocate a new block of size BLOCK_SIZE.
 
 	u32 degree = 0;
 	u32 capacity = TH0;
@@ -139,209 +150,261 @@ class alignas(CACHE_LINE_SIZE) EdgeArray{
 		} type1;
 
 		struct {
-			Neigh* 			__restrict neighArr = nullptr;
-			DstLocPair* 	__restrict mapArr = nullptr;
-		} type2_3;
+			Neigh *neighArr = nullptr;
+		} type2;
+
+		struct {
+			DstLocPair *__restrict mapArr = nullptr;
+			vector<Neigh*> blockList;
+			u64 mapCapacity;
+		} type3;
+
 	} etype;
 
 	u8 __pad[CACHE_LINE_SIZE - sizeof(u32) - sizeof(u32) - sizeof(etype)];
 
-	void insertEdge(const Idx dstId, const Weight weight, u64& edgeCnt){
+	void insertEdge(const Idx dstId, const Weight weight, u64 &edgeCnt) {
+		assert(dstId >= 0);
 		//First, check if needs expanding
-		if(__builtin_expect(degree == capacity, false)){
-			capacity = getNextPow2(capacity * 2);
-			Neigh* __restrict newPtr = (Neigh*)globalAllocator.allocPow2(capacity * sizeof(Neigh));
+		if (__builtin_expect(degree == capacity, false)) {
 
-			if(degree <= TH0){	//Going from Type 1 to Type 2
+			if (degree == TH0) {	//Type 1 => Type 2
+				capacity = getNextPow2(capacity * 2);
+				Neigh *__restrict newPtr = (Neigh*) globalAllocator.allocPow2(
+						capacity * sizeof(Neigh));
 				memcpy(newPtr, etype.type1.neigh, degree * sizeof(Neigh));
-				etype.type2_3.mapArr = nullptr;
-			}
-			else{				//Type 2 or 3
-				memcpy(newPtr, etype.type2_3.neighArr, degree * sizeof(Neigh));
-				globalAllocator.freePow2(etype.type2_3.neighArr, capacity / 2 * sizeof(Neigh));
-			}
-			etype.type2_3.neighArr = newPtr;
-
-			//Grow hash table if needed
-			rebuildHashTable(capacity / 2, capacity);
-		}
-
-		Neigh* __restrict currNeighArr;
-
-		if(capacity <= TH0){
+				etype.type2.neighArr = newPtr;
+			} else if ((degree * 2) <= TH1) { // Type 2 => Type 2
+				capacity = capacity * 2;
+				Neigh *__restrict newPtr = (Neigh*) globalAllocator.allocPow2(
+						capacity * sizeof(Neigh));
+				memcpy(newPtr, etype.type2.neighArr, degree * sizeof(Neigh));
+				globalAllocator.freePow2(etype.type2.neighArr,
+						degree * sizeof(Neigh));
+				etype.type2.neighArr = newPtr;
+			} else if (degree == TH1) { // Type 2 => Type 3
+				capacity = capacity + BLOCK_SIZE;
+				new (&etype.type3.blockList) vector<Neigh*>();
+				etype.type3.blockList.push_back(etype.type2.neighArr);
+				assert(etype.type2.neighArr[0].node >= 0);
+				Neigh *newPtr = (Neigh*) globalAllocator.allocPow2(
+						BLOCK_SIZE * sizeof(Neigh));
+				etype.type3.blockList.push_back(newPtr);
+				etype.type3.mapCapacity = 0;
+				//Grow hash table if needed
+				rebuildHashTable(degree, capacity);
+			} else { // Type 3 => Type 3
+				capacity = capacity + BLOCK_SIZE;
+				Neigh *newPtr = (Neigh*) globalAllocator.allocPow2(
+						BLOCK_SIZE * sizeof(Neigh));
+				etype.type3.blockList.push_back(newPtr);
+				//Grow hash table if needed
+				rebuildHashTable(degree, capacity);
+			}
+		}
+
+		Neigh *__restrict currNeighArr;
+
+		if (capacity <= TH0) {
 			currNeighArr = etype.type1.neigh;
-		}
-		else{
-			currNeighArr = etype.type2_3.neighArr;
+		} else if (capacity <= TH1) {
+			currNeighArr = etype.type2.neighArr;
 		}
 
 		//search and insert if not found
-		if(capacity <= TH1){
+		if (capacity <= TH1) {
 			//Type 1 or 2, do linear search
-			for(u64 i = 0; i < degree; i++){
-				if(currNeighArr[i].node == dstId){
+			for (u64 i = 0; i < degree; i++) {
+				if (currNeighArr[i].node == dstId) {
 					//found same edge, just update
 					currNeighArr[i].setWeight(weight);
+					assert(false);
 					return;
 				}
 			}
-		}
-		else{
+			//not found, insert
+			currNeighArr[degree].node = dstId;
+			currNeighArr[degree].setWeight(weight);
+		} else {
 			//type 3, use hash table + adj list
-			u32 idx = dstId & (capacity * 2 - 1);
-			DstLocPair* __restrict locMap = etype.type2_3.mapArr;
-			DstLocPair* __restrict insLoc = nullptr;
+			u32 idx = dstId & (etype.type3.mapCapacity - 1);
+			DstLocPair *__restrict locMap = etype.type3.mapArr;
+			DstLocPair *__restrict insLoc = nullptr;
 			//probe = 0;
-			while(true){
+			while (true) {
 				//probe++;
-				if(locMap[idx].dst == FLAG_EMPTY_SLOT){
+				if (locMap[idx].dst == FLAG_EMPTY_SLOT) {
 					//edge not found, insert
-					if(insLoc){
-						locMap = insLoc;	//points to the first tomb stone found
+					if (insLoc) {
+						locMap = insLoc; //points to the first tomb stone found
 					}
 					locMap[idx].dst = dstId;
 					locMap[idx].loc = degree;
 					break;
-				}
-				else if((locMap[idx].dst == FLAG_TOMB_STONE) && (insLoc == nullptr)){
+				} else if ((locMap[idx].dst == FLAG_TOMB_STONE)
+						&& (insLoc == nullptr)) {
 					insLoc = locMap + idx;
-				}
-				else if(locMap[idx].dst == dstId){
+				} else if (locMap[idx].dst == dstId) {
 					//edge found, update weight
-					currNeighArr[locMap[idx].loc].setWeight(weight);
+					u32 blockId = locMap[idx].loc / BLOCK_SIZE;
+					u32 blockOffset = locMap[idx].loc % BLOCK_SIZE;
+					assert(
+							etype.type3.blockList[blockId][blockOffset].node
+									== dstId);
+					etype.type3.blockList[blockId][blockOffset].setWeight(
+							weight);
 					//probingDist[probe]++;
+					assert(false);
 					return;
 				}
 				//move on
 				idx++;
-				if(idx == (capacity * 2)){
+				if (idx == (etype.type3.mapCapacity)) {
 					idx = 0;
 				}
 			}
+			//not found, insert
+			u32 blockId = degree / BLOCK_SIZE;
+			u32 blockOffset = degree % BLOCK_SIZE;
+			etype.type3.blockList[blockId][blockOffset].node = dstId;
+			etype.type3.blockList[blockId][blockOffset].setWeight(weight);
+			assert(etype.type3.blockList[blockId][blockOffset].node >= 0);
+			assert(
+					etype.type3.blockList[etype.type3.blockList.size() - 1][0].node
+							>= 0);
 		}
-		//not found, insert
-		currNeighArr[degree].node = dstId;
-		currNeighArr[degree].setWeight(weight);
 		degree++;
 		edgeCnt++;
 	}
 
-
-	void deleteEdge(const Idx dstId, u64& edgeCnt){
-		Neigh* __restrict currNeighArr;
-		Neigh* __restrict nn = nullptr;
-
-		if(capacity <= TH0){
-			currNeighArr = etype.type1.neigh;
-		}
-		else{
-			currNeighArr = etype.type2_3.neighArr;
-		}
-
+	void deleteEdge(const Idx dstId, u64 &edgeCnt) {
+		assert(dstId >= 0);
 		//search
-		if(capacity <= TH1){
-			//Type 1 or 2, do linear search
-			for(u64 i = 0; i < degree; i++){
-				if(currNeighArr[i].node == dstId){
-					nn = currNeighArr + i;
-					break;
+		if (capacity <= TH0) {	//Type 1
+			Neigh *__restrict currNeighArr = etype.type1.neigh;
+			for (u64 i = 0; i < degree; i++) {
+				if (currNeighArr[i].node == dstId) {
+					//edge found, delete
+					degree--;
+					edgeCnt--;
+					currNeighArr[i] = currNeighArr[degree];
+					return;
 				}
 			}
-			if(__builtin_expect(nn != nullptr, true)){
-				//edge found, delete
-				degree--;
-				edgeCnt--;
-				nn->node = currNeighArr[degree].node;
-				nn->setWeight(currNeighArr[degree].getWeight());
-			}
-			else{
-				//edge not found, nothing to do
-				return;
-			}
 		}
-		else{
+		if (capacity <= TH1) {	//Type2
+			Neigh *__restrict currNeighArr = etype.type2.neighArr;
+			for (u64 i = 0; i < degree; i++) {
+				if (currNeighArr[i].node == dstId) {
+					//edge found, delete
+					degree--;
+					edgeCnt--;
+					currNeighArr[i] = currNeighArr[degree];
+
+					if (degree * 4 <= capacity) {
+						//reduce capacity
+						u64 newCap = capacity / 2;
+						if (newCap <= TH0) {
+							//T2 => T1
+							memcpy(etype.type1.neigh, currNeighArr,
+									degree * sizeof(Neigh));
+							globalAllocator.freePow2(currNeighArr,
+									capacity * sizeof(Neigh));
+							capacity = TH0;
+						} else {
+							//T2 => T2
+							etype.type2.neighArr =
+									(Neigh*) globalAllocator.allocPow2(
+											newCap * sizeof(Neigh));
+							memcpy(etype.type2.neighArr, currNeighArr,
+									degree * sizeof(Neigh));
+							globalAllocator.freePow2(currNeighArr,
+									capacity * sizeof(Neigh));
+							capacity = newCap;
+						}
+					}
+					return;
+				}
+			}
+		} else {	//Type 3
 			//using hashed mode
-			u32 idx = dstId & (capacity * 2 - 1);
-			DstLocPair* __restrict locMap = etype.type2_3.mapArr;
-			while(true){
-				if(locMap[idx].dst == dstId){
+			u32 idx = dstId & (etype.type3.mapCapacity - 1);
+			DstLocPair *__restrict locMap = etype.type3.mapArr;
+			while (true) {
+				if (locMap[idx].dst == dstId) {
 					//edge found, delete
 					degree--;
 					edgeCnt--;
 					//delSucc++;
-					locMap[idx].dst = FLAG_TOMB_STONE; 				//invalidate previous hash-table entry
+					locMap[idx].dst = FLAG_TOMB_STONE; //invalidate previous hash-table entry
 
 					const u32 loc = locMap[idx].loc;
-					if(__builtin_expect(loc != degree, true)){		//nothing to do if last entry is removed
-						const u32 node = currNeighArr[degree].node;
+					if (__builtin_expect(loc != degree, true)) { //nothing to do if last entry is removed
+						u32 currBlockId = loc / BLOCK_SIZE;
+						u32 currBlockOffset = loc % BLOCK_SIZE;
+						u32 lastBlockId = degree / BLOCK_SIZE;
+						u32 lastBlockOffset = degree % BLOCK_SIZE;
+
+						const u32 node =
+								etype.type3.blockList[lastBlockId][lastBlockOffset].node;
 						//copy last entry
-						currNeighArr[loc] = currNeighArr[degree];
+						etype.type3.blockList[currBlockId][currBlockOffset] =
+								etype.type3.blockList[lastBlockId][lastBlockOffset];
 
 						//point to correct location of the swapped entry
-						u32 idxMoved = node & (capacity * 2 - 1);
-						while(locMap[idxMoved].dst != node){
+						u32 idxMoved = node & (etype.type3.mapCapacity - 1);
+						while (locMap[idxMoved].dst != node) {
 							idxMoved++;
-							if(idxMoved == (capacity * 2)){
+							if (idxMoved == (etype.type3.mapCapacity)) {
 								idxMoved = 0;
 							}
 						}
 						locMap[idxMoved].loc = loc;
 					}
-					break;
-				}
-				else if (locMap[idx].dst == FLAG_EMPTY_SLOT) {
+
+					//free block if needed
+					if (degree % BLOCK_SIZE == 0) {
+						assert(degree == capacity - BLOCK_SIZE);
+						globalAllocator.freePow2(etype.type3.blockList.back(),
+								BLOCK_SIZE * sizeof(Neigh));
+						etype.type3.blockList.pop_back();
+						capacity = capacity - BLOCK_SIZE;
+
+						//check if type switch or rehash is necessary
+						if (capacity <= TH1) {	//T3 => T2
+							globalAllocator.freePow2(etype.type3.mapArr,
+									etype.type3.mapCapacity * sizeof(Neigh));
+							etype.type2.neighArr = etype.type3.blockList[0];
+						} else { 	//T3 => T3
+							//rehash if needed
+							rebuildHashTable(capacity + BLOCK_SIZE, capacity);
+						}
+					}
+
+					return;
+				} else if (locMap[idx].dst == FLAG_EMPTY_SLOT) {
 					//edge not found, return
 					return;
 				}
 				//move on
 				idx++;
-				if(idx == (capacity * 2)){
+				if (idx == (etype.type3.mapCapacity)) {
 					idx = 0;
 				}
 			}
 		}
-
-		if((capacity > TH0) && ((degree * 4) <= capacity)){
-			//time to reduce capacity
-			const u64 oldCap = capacity;
-			const u64 newCap = capacity / 2;
-			capacity = newCap;
-
-			Neigh* __restrict oldPtr = etype.type2_3.neighArr;
-			Neigh* __restrict newPtr;
-
-			if(newCap <= TH0){
-				//moving from type 2 or 3 to type 1
-				newPtr = etype.type1.neigh;
-				capacity = TH0;
-			}
-			else{
-				etype.type2_3.neighArr = (Neigh*)globalAllocator.allocPow2(newCap * sizeof(Neigh));
-				newPtr = etype.type2_3.neighArr;
-			}
-
-			//copy old adjList and free
-			memcpy(newPtr, oldPtr, degree * sizeof(Neigh));
-			globalAllocator.freePow2(oldPtr, oldCap * sizeof(Neigh));
-
-			//shrink or delete hash table if needed
-			rebuildHashTable(oldCap, newCap);
-		}
 	}
 };
 
-
-template <typename Neigh>
-class Vertex{
+template<typename Neigh>
+class Vertex {
 public:
-	 EdgeArray<Neigh>		inEdges;
-	 EdgeArray<Neigh>		outEdges;
+	EdgeArray<Neigh> inEdges;
+	EdgeArray<Neigh> outEdges;
 };
 
-
 #endif
 
-
 #if defined(USE_GT_BALANCED_TYPE3_ONLY)
 
 #define 	FLAG_EMPTY_SLOT			0xFFFFFFFFU
@@ -530,7 +593,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_GT_BALANCED_STDMAP
 
 #define		CACHE_LINE_SIZE			64
@@ -721,8 +783,6 @@ class Vertex{
 
 #endif
 
-
-
 #if defined(USE_GT_BALANCED_MALLOC_STDMAP) || defined(USE_GT_BALANCED_RHH)
 
 #define		CACHE_LINE_SIZE			64
@@ -913,8 +973,6 @@ class Vertex{
 
 #endif
 
-
-
 #if defined(USE_GT_BALANCED_TSL_RHH)
 
 #define		CACHE_LINE_SIZE			64
@@ -1106,9 +1164,6 @@ class Vertex{
 
 #endif
 
-
-
-
 #ifdef USE_GT_BALANCED_ABSEIL
 
 #define		CACHE_LINE_SIZE			64
@@ -1299,8 +1354,6 @@ class Vertex{
 
 #endif
 
-
-
 #ifdef USE_GT_BALANCED_MALLOC
 
 #define 	FLAG_EMPTY_SLOT			0xFFFFFFFFU
@@ -1566,8 +1619,6 @@ class Vertex{
 
 #endif
 
-
-
 #ifdef USE_GT_UPDATE
 
 #define 	FLAG_EMPTY_SLOT			0xFFFFFFFFU
@@ -1832,9 +1883,7 @@ class alignas(CACHE_LINE_SIZE) EdgeArray{
 				degree--;
 				edgeCnt--;
 				nn->node = currNeighArr[degree].node;
-				nn->setWeight(currNeighArr[degree].getWeight());
-			}
-			else{
+
 				//edge not found, nothing to do
 				return;
 			}
@@ -1921,7 +1970,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_HYBRID_HASHMAP_WITH_GROUPING_AND_EDGE_ARR_LOCKING
 
 template <typename Neigh>
@@ -1944,7 +1992,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_SORTED_EDGES
 
 template <typename Neigh>
@@ -1966,7 +2013,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_CAHCE_FRIENDLY_HASH
 
 #include "GraphTangoHash.h"
@@ -1980,7 +2026,6 @@ class Vertex{
 
 #endif
 
-
 #ifdef USE_CAHCE_FRIENDLY_HASH_ONLY
 
 #include "GraphTangoHash.h"
@@ -1993,4 +2038,3 @@ class Vertex{
 };
 
 #endif
-
diff --git a/src/dynamic/abstract_data_struc.h b/src/dynamic/abstract_data_struc.h
index 475005e..693f55c 100644
--- a/src/dynamic/abstract_data_struc.h
+++ b/src/dynamic/abstract_data_struc.h
@@ -16,10 +16,6 @@ class dataStruc {
     int64_t num_nodes = 0;
     int64_t num_edges = 0;
 
-#ifdef CALC_TYPE_SWITCH
-	uint64_t switchCnt = 0;
-#endif
-
     bool weighted;
     bool directed;
     std::vector<float> property;
diff --git a/src/dynamic/common.h b/src/dynamic/common.h
index 4b77c8c..436eabe 100644
--- a/src/dynamic/common.h
+++ b/src/dynamic/common.h
@@ -21,6 +21,9 @@ typedef		int8_t		i8;
 typedef		I64			Idx;
 
 #define 	MIN_RET_VAL  		2
+#ifndef _OPENMP
+#define _OPENMP
+#endif
 
 //#define LIKWID_PERFMON
 
@@ -38,6 +41,8 @@ typedef		I64			Idx;
 #define LIKWID_MARKER_GET(regionTag, nevents, events, time, count)
 #endif
 
+//#define ENABLE_PROFILING
+
 //#define		CALC_TYPE_SWITCH
 //#define		USE_CFH_FOR_DAH
 //#define		CALC_MEM_PER_EDGE
@@ -45,10 +50,19 @@ typedef		I64			Idx;
 //#define 	CALC_EDGE_TOUCHED
 //#define 	USE_HUGEPAGE
 
+#ifdef ENABLE_PROFILING
+#define		CALC_TYPE_SWITCH
+#define		CALC_MEM_PER_EDGE
+//#define 	CALC_EDGE_TOUCHED	/* do it later as it requires running the algo */
+#define		CALC_STATIC_TYPE_MAPPING
+#define		CALC_DYNNAMIC_TYPE_MAPPING
+#endif
+
 //define only one of the following
 //#define 	USE_HYBRID_HASHMAP
 //#define 	USE_HYBRID_HASHMAP_WITH_CFH
-#define 	USE_GT_BALANCED
+#define 	USE_GT_LOAD_BALANCED
+//#define 	USE_GT_BALANCED
 //#define 	USE_GT_BALANCED_TYPE3_ONLY
 //#define 	USE_GT_BALANCED_MALLOC
 //#define 	USE_GT_BALANCED_STDMAP
@@ -82,14 +96,19 @@ typedef		I64			Idx;
 			|| defined(USE_GT_BALANCED_DYN_PARTITION)									\
 			|| defined(USE_GT_BALANCED_ABSEIL)											\
 			|| defined(USE_GT_BALANCED_RHH)												\
-			|| defined(USE_GT_BALANCED_TSL_RHH)
-#define		HYBRID_HASH_PARTITION		32UL
+			|| defined(USE_GT_BALANCED_TSL_RHH)											\
+			|| defined(USE_GT_LOAD_BALANCED)
+#define		HYBRID_HASH_PARTITION		64UL
 #endif
 
 #ifdef		USE_SORTED_EDGES
 #define		LINEAR_BUFF_SIZE			512UL
 #endif
 
+#ifdef		USE_GT_LOAD_BALANCED
+#define		LB_NUMBER_OF_BUCKETS		128UL
+#endif
+
 typedef struct {
 	u32 dst;
 	u32 loc;
@@ -101,6 +120,10 @@ typedef enum {
 	VTYPE_3
 } VType;
 
+constexpr bool isPowOf2(u64 num){
+	return !((num - 1) & num);
+}
+
 // Log2 for power of 2 integers
 //#define 	LOG2(x) 	__builtin_ctzl(x)
 constexpr U64 getPow2Log2(U64 val) {
diff --git a/src/dynamic/fileReader.h b/src/dynamic/fileReader.h
index 5b5c4cf..d2ba591 100644
--- a/src/dynamic/fileReader.h
+++ b/src/dynamic/fileReader.h
@@ -26,11 +26,12 @@ Edge convertCSVLineIntoEdge(const char delim, const string& line, bool weighted)
     
     getline(ss, data, delim); e.source = stol(data);
     getline(ss, data, delim); e.destination = stol(data);
-    getline(ss, data, delim); /*time = stol(data);*/
+    //getline(ss, data, delim); /*time = stol(data);*/
 
     if(weighted){
-        getline(ss, data, delim);
-        e.weight = stol(data);
+        //getline(ss, data, delim);
+        //e.weight = stol(data);
+        e.weight = (rand() % 8) + 8;
     }
 
     if(line[0] == '-'){
@@ -75,7 +76,7 @@ void readBatchFromCSV(EdgeList& el, ifstream& in, int batchSize, int batch_id, b
 
     while(getline(in, line)){
         if(line != ""){          
-            Edge e = convertCSVLineIntoEdge(',', line, weighted);
+            Edge e = convertCSVLineIntoEdge(' ', line, weighted);
             if(assignLogicalID(e.source, VMap, lastAssignedLogicalID)) e.sourceExists = true;
             if(assignLogicalID(e.destination, VMap, lastAssignedLogicalID)) e.destExists = true;
             //e.batch_id = batch_id;
diff --git a/src/dynamic/frontEnd.cc b/src/dynamic/frontEnd.cc
index aefffb5..e4fb14d 100644
--- a/src/dynamic/frontEnd.cc
+++ b/src/dynamic/frontEnd.cc
@@ -12,7 +12,6 @@
 #include "parser.h"
 #include "../common/timer.h"
 
-
 using namespace std;
 /* Main thread that launches everything else */
 
@@ -33,70 +32,68 @@ int main(int argc, char *argv[]) {
 	el.reserve(opts.batch_size);
 
 	Timer t;
-	dataStruc *ds = createDataStruc(opts.type, opts.weighted, opts.directed, opts.num_nodes, opts.num_threads);
+	dataStruc *ds = createDataStruc(opts.type, opts.weighted, opts.directed,
+			opts.num_nodes, opts.num_threads);
 	Algorithm alg(opts.algorithm, ds, opts.type);
 
 	ofstream updF("Update.csv");
 
 	while (!file.eof()) {
-		readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID);
+		readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted,
+				VMAP, lastAssignedNodeID);
 
 		t.Start();
 		ds->update(el);
 		t.Stop();
 
 		updF << t.Seconds() << endl;
-		cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl;
-
-		alg.performAlg();
+		cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes
+				<< ", Edges " << ds->num_edges << endl;
 
 		batch_id++;
 	}
-	updF.close();
-
-
-//	while (!file.eof()) {
-//		readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID);
-//		ds->update(el);
-//		cout << "Inserted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl;
-//		//cout << "ins," << ((ds->num_edges * 1.0) / ds->num_nodes) << endl;
-//		batch_id++;
-//	}
-//
-//	file.close();
-
-//	stringstream ss;
-//	ss << opts.filename << ".del";
-//	file.open(ss.str());
-//	if (!file.is_open()) {
-//		cout << "Couldn't open file " << ss.str() << endl;
-//		exit(-1);
-//	}
-//
-//	batch_id = 0;
-//	while (!file.eof()) {
-//		readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted, VMAP, lastAssignedNodeID);
-//
-//		t.Start();
-//		ds->update(el);
-//		t.Stop();
-//
-//		updF << t.Seconds() << endl;
-//		//cout << "del," << ((ds->num_edges * 1.0) / ds->num_nodes) << endl;
-//		cout << "Deleted Batch " << batch_id << ": Nodes " << ds->num_nodes << ", Edges " << ds->num_edges << endl;
-//
-//		alg.performAlg();
-//
-//		batch_id++;
-//	}
-//	updF.close();
+	file.close();
+
+#ifndef ENABLE_PROFILING
+	alg.performAlg();
+#endif
 
+	if (opts.enDeleteEdges) {
+		ofstream updD("delUpdate.csv");
+		stringstream ss;
+		ss << opts.filename << ".del";
+		file.open(ss.str());
+		if (!file.is_open()) {
+			cout << "Couldn't open file for delete" << ss.str() << endl;
+			exit(-1);
+		}
+
+		batch_id = 0;
+		el.clear();
+		while (!file.eof()) {
+			readBatchFromCSV(el, file, opts.batch_size, batch_id, opts.weighted,
+					VMAP, lastAssignedNodeID);
+
+			t.Start();
+			ds->update(el);
+			t.Stop();
+
+			updD << t.Seconds() << endl;
+			cout << "Deleted Batch " << batch_id << ": Nodes " << ds->num_nodes
+					<< ", Edges " << ds->num_edges << endl;
+
+			batch_id++;
+		}
+		file.close();
+		updD.close();
+	}
 	ds->print();
+	if (ds) {
+		delete ds;
+	}
+
 #ifdef CALC_EDGE_TOUCHED
 	cout << "EDGES TOUCHED: " << g_edge_touched << endl;
 #endif
-#ifdef CALC_TYPE_SWITCH
-	cout << "Switch count: " << ds->switchCnt << endl;
-#endif
 }
 
diff --git a/src/dynamic/parser.cc b/src/dynamic/parser.cc
index ced3352..e01d7d2 100644
--- a/src/dynamic/parser.cc
+++ b/src/dynamic/parser.cc
@@ -50,6 +50,7 @@ void printUsage()
 		  << "-n max number of nodes  to initialize with\n"
 	      << "-a algorithm      algorithm to run (default: bfsdyn)\n"
 	      << "-t number of threads      (default: 16)\n"
+		  << "-r enable edge deletion	0=disable	1=enable\n"
 	      << "  DATA STRUCTURE OPTIONS:\n"
 		  << "               1) adList (single-threaded) \n"		  
 	      << "               2) adListShared (multihtreaded shared style) \n"
@@ -78,7 +79,7 @@ cmd_args parse(int argc, char *argv[])
 {
     cmd_args args;
     int opt = 0;
-    while(-1 != (opt = getopt(argc, argv, "f:b:w:d:s:n:a:t:h"))) {
+    while(-1 != (opt = getopt(argc, argv, "f:b:w:d:s:n:a:t:h:r"))) {
         switch(opt) {
 	case 'f':               
 //	    if (getSuffix(optarg) != ".csv") {
@@ -130,6 +131,17 @@ cmd_args parse(int argc, char *argv[])
 	case 't':
 	    args.num_threads = atoi(optarg);    
 	    break;
+	case 'r':
+		if(atoi(optarg) == 1) {
+			args.enDeleteEdges = true;
+		} else if (atoi(optarg) == 0) {
+			args.enDeleteEdges = false;
+		} else {
+			std::cerr << "-r only takes 0 or 1" << std::endl;
+			printUsage();
+			exit(-1);
+		}
+		break;
 	case 'a':
 	    args.algorithm = optarg;                  
 	    if (!supportedAlg(args.algorithm)) {
diff --git a/src/dynamic/parser.h b/src/dynamic/parser.h
index 3472905..61f5747 100644
--- a/src/dynamic/parser.h
+++ b/src/dynamic/parser.h
@@ -7,6 +7,7 @@ struct cmd_args {
     int batch_size = 0;
     bool directed = false;
     bool weighted = false;
+    bool enDeleteEdges = false;
     int64_t num_nodes = 0;
     std::string filename;
     std::string type = "graphTango";
diff --git a/src/dynamic/traversal.h b/src/dynamic/traversal.h
index a9e385c..29ea32b 100644
--- a/src/dynamic/traversal.h
+++ b/src/dynamic/traversal.h
@@ -29,14 +29,19 @@ template<typename U>
 class neighborhood_iter<GraphTango<U>> {
 	friend class neighborhood<GraphTango<U>> ;
 private:
-	U* cursor;
+	u32 idx;
+	U* arr;
+	U** blocks;
+	bool isType3;
 public:
-	neighborhood_iter(U* _cursor) {
-		cursor = _cursor;
-	}
+	neighborhood_iter(u32 idx, U* arr, U** blocks, bool isType3) :
+			idx(idx),
+			arr(arr),
+			blocks(blocks),
+			isType3(isType3) { }
 
 	bool operator!=(const neighborhood_iter<GraphTango<U>> &it) {
-		return cursor != it.cursor;
+		return idx != it.idx;
 	}
 
 	neighborhood_iter& operator++() {
@@ -44,8 +49,11 @@ class neighborhood_iter<GraphTango<U>> {
 		#pragma omp atomic
 		g_edge_touched++;
 #endif
-
-		cursor++;
+		idx++;
+		if(isType3 && !(idx % EdgeArray<U>::BLOCK_SIZE)){
+			blocks++;
+			arr = *blocks;
+		}
 		return *this;
 	}
 
@@ -54,16 +62,22 @@ class neighborhood_iter<GraphTango<U>> {
 		#pragma omp atomic
 		g_edge_touched++;
 #endif
-		cursor++;
+		idx++;
+		if(isType3 && !(idx % EdgeArray<U>::BLOCK_SIZE)){
+			blocks++;
+			arr = *blocks;
+		}
 		return *this;
 	}
 
-	NodeID operator*() {
-		return cursor->getNodeID();
+	NodeID operator*() const {
+		u32 offset = idx % EdgeArray<U>::BLOCK_SIZE;
+		return arr[offset].getNodeID();
 	}
 
-	Weight extractWeight() {
-		return cursor->getWeight();
+	Weight extractWeight() const {
+		u32 offset = idx % EdgeArray<U>::BLOCK_SIZE;
+		return arr[offset].getWeight();
 	}
 };
 
@@ -782,39 +796,55 @@ class neighborhood<GraphTango<U>> {
 		|| defined(USE_GT_BALANCED_DYN_PARTITION)	\
 		|| defined(USE_GT_BALANCED_ABSEIL)			\
 		|| defined(USE_GT_BALANCED_RHH)				\
-		|| defined(USE_GT_BALANCED_TSL_RHH)
+		|| defined(USE_GT_BALANCED_TSL_RHH)			\
+		|| defined(USE_GT_LOAD_BALANCED)
 
 template<typename U>
 class neighborhood<GraphTango<U>> {
 private:
-	U* _start;
-	uint64_t _size;
+
+	uint64_t degree;
+	U* arr = nullptr;
+	U** blocks = nullptr;
+	bool isType3 = false;
+
+
 public:
 	neighborhood(NodeID _node, GraphTango<U> *_ds, bool _in_neigh) {
 		if(_in_neigh){
 			if(_ds->vArray[_node].inEdges.capacity <= EdgeArray<U>::TH0){
-				_start = _ds->vArray[_node].inEdges.etype.type1.neigh;
+				arr = _ds->vArray[_node].inEdges.etype.type1.neigh;
+			}
+			else if(_ds->vArray[_node].inEdges.capacity <= EdgeArray<U>::TH1){
+				arr = _ds->vArray[_node].inEdges.etype.type2.neighArr;
 			}
 			else{
-				_start = _ds->vArray[_node].inEdges.etype.type2_3.neighArr;
+				arr = _ds->vArray[_node].inEdges.etype.type3.blockList[0];
+				blocks = _ds->vArray[_node].inEdges.etype.type3.blockList.data();
+				isType3 = true;
 			}
-			_size = _ds->vArray[_node].inEdges.degree;
+			degree = _ds->vArray[_node].inEdges.degree;
 		}
 		else{
 			if(_ds->vArray[_node].outEdges.capacity <= EdgeArray<U>::TH0){
-				_start = _ds->vArray[_node].outEdges.etype.type1.neigh;
+				arr = _ds->vArray[_node].outEdges.etype.type1.neigh;
+			}
+			else if(_ds->vArray[_node].outEdges.capacity <= EdgeArray<U>::TH1){
+				arr = _ds->vArray[_node].outEdges.etype.type2.neighArr;
 			}
 			else{
-				_start = _ds->vArray[_node].outEdges.etype.type2_3.neighArr;
+				arr = _ds->vArray[_node].outEdges.etype.type3.blockList[0];
+				blocks = _ds->vArray[_node].outEdges.etype.type3.blockList.data();
+				isType3 = true;
 			}
-			_size = _ds->vArray[_node].outEdges.degree;
+			degree = _ds->vArray[_node].outEdges.degree;
 		}
 	}
-	neighborhood_iter<GraphTango<U>> begin() {
-		return neighborhood_iter<GraphTango<U>>(_start);
+	neighborhood_iter<GraphTango<U>> begin() const {
+		return neighborhood_iter<GraphTango<U>>(0, arr, blocks, isType3);
 	}
-	neighborhood_iter<GraphTango<U>> end() {
-		return neighborhood_iter<GraphTango<U>>(_start + _size);
+	neighborhood_iter<GraphTango<U>> end() const {
+		return neighborhood_iter<GraphTango<U>>(degree, arr, blocks, isType3);
 	}
 };