ar-io-node/.env.example at develop · ar-io/ar-io-node · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# Starting block height for node synchronization (0 = start from the beginning)
# START_HEIGHT=0

# Stop block height for node synchronization (Infinity = keep syncing until stopped)
# STOP_HEIGHT="Infinity"

# Arweave node to use for fetching data
# TRUSTED_NODE_URL="https://arweave.net"

# Arweave node to use for proxying requests
# TRUSTED_GATEWAY_URL="https://arweave.net"

# Trusted gateways configuration (JSON object with URL keys and priority/trust config)
# Simple format (implicitly trusted): TRUSTED_GATEWAYS_URLS='{"https://turbo-gateway.com": 1}'
# Extended format with trust control: TRUSTED_GATEWAYS_URLS='{"https://turbo-gateway.com": 1, "https://arweave.net": {"priority": 2, "trusted": false}}'
# TRUSTED_GATEWAYS_URLS='{"https://turbo-gateway.com": 1, "https://arweave.net": {"priority": 2, "trusted": false}}'

# Origins to block when forwarding to trusted gateways (comma-separated list)
# TRUSTED_GATEWAYS_BLOCKED_ORIGINS="self.gateway.com,loop.source.net"

# IPs and CIDR ranges to block when forwarding to trusted gateways (comma-separated list)
# TRUSTED_GATEWAYS_BLOCKED_IPS_AND_CIDRS="192.168.1.100/32,10.0.0.0/8,2001:db8::1"

# Client IPs/CIDRs that should only be served from local cache (comma-separated list).
# Requests from these IPs skip all upstream fetching on cache miss. Useful for HA peer
# setups where each node shares its cache but shouldn't do redundant upstream work.
# CACHE_ONLY_CLIENT_IPS_AND_CIDRS="10.0.1.11,10.0.1.12"

# ArNS gateway
# TRUSTED_ARNS_GATEWAY_URL="https://__NAME__.turbo-gateway.com"

# If true, skips the local cache and always fetches headers from the node
# SKIP_CACHE=false

# If true, skips the data cache (read-through data cache) and always fetches data from upstream
# SKIP_DATA_CACHE=false

# Adds an "INSTANCE_ID" field to output logs
# INSTANCE_ID=""

# Sets the format of output logs, accepts "simple" and "json"
# LOG_FORMAT="simple"

# AR.IO node exposed port number
# CORE_PORT=4000
# ENVOY_PORT=3000
# CLICKHOUSE_PORT=9000
# CLICKHOUSE_PORT_2=8123
# CLICKHOUSE_PORT_3=8443
# OBSERVER_PORT=5050

# Number from 0 to 1, representing the probability of a request failing
# SIMULATED_REQUEST_FAILURE_RATE=0

# Arweave wallet address used for staking and rewards
# AR_IO_WALLET=""

# Arweave wallet address used by the observer to upload report data and interact with the AR.IO process
# OBSERVER_WALLET=""

# Admin key value used for accessing the admin API
# ADMIN_API_KEY="secret"

# Alternatively use filepath to admin key used for accessing the admin API
# it takes precedence over ADMIN_API_KEY
# ADMIN_API_KEY_FILE="/path/to/admin-key.txt"

# If true, ar.io node will start indexing missing bundles
# BACKFILL_BUNDLE_RECORDS=false

# If true, all indexed bundles will be reprocessed with the new filters (you can use this when you change the filters)
# FILTER_CHANGE_REPROCESS=false

# Only bundles compliant with this filter will be unbundled
# ANS104_UNBUNDLE_FILTER={"never": true}

# Only bundles compliant with this filter will be indexed
# ANS104_INDEX_FILTER={"never": true}

# Root host for ArNS
# ARNS_ROOT_HOST=""

# Protocol setting in process of creating sandbox domain in ArNS (ARNS_ROOT_HOST needs to be set for this env to have any effect)
# SANDBOX_PROTOCOL=""

# If true, start indexing blocks, tx, ANS104 bundles
# START_WRITERS=true

# If true, the observer runs when using docker-compose
# RUN_OBSERVER=false

# Sets target server for webhooks, webhooks disabled if not set
# WEBHOOK_TARGET_SERVERS="http://localhost:3000,http://localhost:3001"

# Sets the webhook index filter, webhooks disabled if both index and block filters are not set
# WEBHOOK_INDEX_FILTER={"never": true}

# Sets the webhook block filter, webhooks disabled if both index and block filters are not set
# WEBHOOK_BLOCK_FILTER={"never": true}

# If true, the observer will start indexing pending tx from the mempool
# ENABLE_MEMPOOL_WATCHER=true

# Sets the mempool polling interval in milliseconds
# MEMPOOL_POLLING_INTERVAL_MS=30000

# Data source retrieval order for on-demand data requests (comma-separated list of sources)
# Available sources: 'trusted-gateways', 'ar-io-network', 'chunks-offset-aware', 'chunks-data-item' (deprecated), 'turbo', 'tx-data'
# ON_DEMAND_RETRIEVAL_ORDER="trusted-gateways,ar-io-network,chunks-offset-aware,tx-data"

# Enable external API searches (GraphQL/Turbo) for finding root transaction when local attributes are incomplete
# ENABLE_DATA_ITEM_ROOT_TX_SEARCH=true

# Allow data retrieval without offset information in offset-aware data sources
# ENABLE_PASSTHROUGH_WITHOUT_OFFSETS=true

# Chunk data retrieval order (comma-separated list of sources)
# Available sources: 'arweave-network', 'legacy-s3'
# CHUNK_DATA_RETRIEVAL_ORDER="arweave-network"

# Chunk metadata retrieval order (comma-separated list of sources)
# Available sources: 'arweave-network', 'legacy-psql'
# CHUNK_METADATA_RETRIEVAL_ORDER="arweave-network"

# Parallelism settings for chunk source operations
# CHUNK_DATA_SOURCE_PARALLELISM=1
# CHUNK_METADATA_SOURCE_PARALLELISM=1

# Preferred chunk GET nodes (comma-separated URLs)
# Defaults to data-1 through data-17 and tip-1 through tip-5.arweave.xyz:1984 if not set
# To override defaults, provide your own comma-separated list of URLs
# PREFERRED_CHUNK_GET_NODE_URLS=http://custom1.example.com:1984,http://custom2.example.com:1984

# Preferred chunk POST nodes (comma-separated URLs)
# Defaults to http://tip-2.arweave.xyz:1984 through http://tip-4.arweave.xyz:1984 if not set
# To override defaults, provide your own comma-separated list of URLs
# PREFERRED_CHUNK_POST_NODE_URLS=http://custom1.example.com:1984,http://custom2.example.com:1984

# Arweave POST dry-run mode (for testing without posting to chain)
# If true, simulates both transaction header and chunk submission without posting to Arweave
# Useful for testing apps like ArDrive and large uploads without burning AR tokens
# Returns 200 OK to clients as if transactions were successfully posted
# Works on both port 3000 (Envoy) and port 4000 (direct to core service)
# When enabled, Envoy routes POST /tx and POST /chunk to core for dry-run handling
# When disabled, Envoy routes POST /tx and POST /chunk to trusted Arweave nodes
# GET /tx requests are always proxied to trusted Arweave nodes for retrieving data
# ARWEAVE_POST_DRY_RUN=false

# Skip validation in dry-run mode (for faster testing)
# If true, skips transaction signature and chunk merkle proof validation
# Useful when you want maximum upload speed without verification overhead
# Only applies when ARWEAVE_POST_DRY_RUN=true
# ARWEAVE_POST_DRY_RUN_SKIP_VALIDATION=false

# Include transaction/data item tags as X-Arweave-Tag-* HTTP response headers
# on /raw/:id and /:id endpoints. Disabled by default.
# ARWEAVE_TAG_RESPONSE_HEADERS_ENABLED=true
# ARWEAVE_TAG_RESPONSE_HEADERS_MAX=100
# ARWEAVE_TAG_RESPONSE_HEADERS_MAX_BYTES=8192

# DNS resolution interval for preferred chunk nodes (in seconds)
# DNS resolution reduces lookup overhead by resolving hostnames to IPs on startup
# Applies to both GET and POST nodes
# Set to 0 to disable periodic re-resolution (default: 3600 - 1 hour)
# PREFERRED_CHUNK_NODE_DNS_RESOLUTION_INTERVAL_SECONDS=3600

# ==============================================================================
# ClickHouse Auto-Import Configuration (optional)
# ==============================================================================

# Sleep interval between import cycles (seconds, default: 3600)
# CLICKHOUSE_AUTO_IMPORT_SLEEP_INTERVAL=3600

# Height range to process in each batch (default: 10000)
# CLICKHOUSE_AUTO_IMPORT_HEIGHT_INTERVAL=10000

# Maximum rows per Parquet file (default: 1000000)
# CLICKHOUSE_AUTO_IMPORT_MAX_ROWS_PER_FILE=1000000

# Enable Apache Iceberg metadata generation (default: false)
# ENABLE_ICEBERG_GENERATION=false

# ==============================================================================
# Datasets Configuration
# ==============================================================================

# Enable datasets endpoint at /local/datasets (default: false)
# ENABLE_DATASETS_ENDPOINT=true

# Datasets proxy configuration (optional)
# By default, /local/datasets routes to the core service
# Set these to proxy to an external datasets service
# DATASETS_PROXY_HOST=datasets.example.com
# DATASETS_PROXY_PORT=8080

# ==============================================================================
# OpenTelemetry Configuration
# ==============================================================================

# Enable file-based export of OTEL spans for development/debugging
# When using 'yarn service:start', this defaults to 'true' (can be overridden here)
# When using 'yarn start', this defaults to 'false'
# OTEL_FILE_EXPORT_ENABLED=false

# Path to OTEL spans file (JSONL format)
# OTEL_FILE_EXPORT_PATH=logs/otel-spans.jsonl

# OTEL service name
# OTEL_SERVICE_NAME=ar-io-node

# OTEL tracing sampling rate (1/N spans will be sampled)
# OTEL_TRACING_SAMPLING_RATE_DENOMINATOR=1

# OTLP exporter endpoint (for production telemetry)
# If set, spans/logs will be sent to this endpoint via OTLP protocol
# OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io

# OTLP exporter headers (for authentication)
# OTEL_EXPORTER_OTLP_HEADERS=x-honeycomb-team=your-api-key

# Alternative: Load OTLP headers from file
# OTEL_EXPORTER_OTLP_HEADERS_FILE=/path/to/headers.txt

# OTLP batch log processor settings
# OTEL_BATCH_LOG_PROCESSOR_SCHEDULED_DELAY_MS=2000
# OTEL_BATCH_LOG_PROCESSOR_MAX_EXPORT_BATCH_SIZE=10000

# ------------------------------------------------------------------------------
# OTEL Collector Configuration (for docker-compose deployments)
# ------------------------------------------------------------------------------

# The OTEL Collector implements tail-based sampling to reduce telemetry costs
# by 80-95% while maintaining 100% visibility into errors, performance issues,
# and ALL paid traffic (x402 payments and paid rate limit tokens).
#
# Sampling Policies:
# - 100% of errors (5xx, exceptions)
# - 100% of slow requests (>2s configurable)
# - 100% of x402 paid requests (for billing/compliance)
# - 100% of paid token usage (for revenue tracking)
# - 1% of free-tier successful requests (for baseline metrics)
#
# Architecture: ar-io-node → otel-collector → final destination
#
# By default in docker-compose, traces are sent to the collector (otel-collector:4318)
# which applies intelligent sampling before forwarding to your telemetry backend.

# Final telemetry destination - set based on your backend:
# Honeycomb: https://api.honeycomb.io
# Grafana Cloud Tempo: https://otlp-gateway-prod-us-central-0.grafana.net/otlp
# Datadog: https://trace.agent.datadoghq.com
# New Relic: https://otlp.nr-data.net
# Elastic APM: https://your-deployment.apm.region.cloud.es.io
# OTEL_COLLECTOR_DESTINATION_ENDPOINT=https://api.honeycomb.io

# Backend API Keys - Configure ONE based on your telemetry backend
# Honeycomb API key
# OTEL_COLLECTOR_HONEYCOMB_API_KEY=your-honeycomb-api-key
# Grafana Cloud API key (base64 encoded instance_id:api_key)
# OTEL_COLLECTOR_GRAFANA_CLOUD_API_KEY=your-base64-encoded-key
# Datadog API key
# OTEL_COLLECTOR_DATADOG_API_KEY=your-datadog-api-key
# New Relic license key
# OTEL_COLLECTOR_NEW_RELIC_API_KEY=your-new-relic-license-key
# Elastic APM secret token
# OTEL_COLLECTOR_ELASTIC_API_KEY=your-elastic-secret-token

# Tail sampling: percentage of successful/fast/unpaid traces to sample (1-100)
# Default: 1 (captures 1% of successful requests for baseline metrics)
# OTEL_TAIL_SAMPLING_SUCCESS_RATE=1

# Tail sampling: latency threshold in milliseconds for slow request capture
# Default: 2000 (2 seconds)
# OTEL_TAIL_SAMPLING_SLOW_THRESHOLD_MS=2000

# Tail sampling: percentage of error traces (5xx status codes) to sample (1-100)
# Default: 100 (captures all errors for debugging)
# Reduce only if you have very high error volumes and want to reduce telemetry costs
# OTEL_TAIL_SAMPLING_ERROR_RATE=100

# Tail sampling: percentage of slow request traces to sample (1-100)
# Default: 100 (captures all slow requests for performance analysis)
# Reduce only if you have many slow requests and want to reduce telemetry costs
# OTEL_TAIL_SAMPLING_SLOW_RATE=100

# Tail sampling: percentage of paid traffic traces (x402 verified payments) to sample (1-100)
# Default: 100 (captures all paid traffic for billing/compliance)
# Reduce only if you have very high paid traffic volumes
# OTEL_TAIL_SAMPLING_PAID_TRAFFIC_RATE=100

# Tail sampling: percentage of paid rate limit token usage traces to sample (1-100)
# Default: 100 (captures all paid token usage for billing/compliance)
# Reduce only if you have very high paid token usage volumes
# OTEL_TAIL_SAMPLING_PAID_TOKENS_RATE=100

# OTEL Collector image tag
# OTEL_COLLECTOR_IMAGE_TAG=0.119.0