Skip to content

Commit cf7a177

Browse files
authored
Merge pull request #58 from james-willis/feat/store-options-v2
feat: add options support to Store for format-specific write options
2 parents 88f7474 + 65a8711 commit cf7a177

File tree

5 files changed

+187
-7
lines changed

5 files changed

+187
-7
lines changed

README.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,27 @@ The `Store` class supports the following options:
9999
Use `Store.for_download()` as a convenient shorthand for storing results
100100
as a single Parquet file with a presigned URL.
101101

102+
#### Store options
103+
104+
You can pass format-specific Spark write options through the `options`
105+
parameter. These correspond to the options available in Spark's
106+
`DataFrameWriter` and are applied after the server's default options,
107+
allowing you to override them.
108+
109+
```python
110+
# CSV without headers and a custom delimiter
111+
store = Store.for_download(
112+
format=StorageFormat.CSV,
113+
options={"header": "false", "delimiter": "|"},
114+
)
115+
116+
# GeoJSON preserving null fields
117+
store = Store.for_download(
118+
format=StorageFormat.GEOJSON,
119+
options={"ignoreNullFields": "false"},
120+
)
121+
```
122+
102123
### Execution progress
103124

104125
You can monitor the progress of running queries by registering a

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "wherobots-python-dbapi"
3-
version = "0.24.0"
3+
version = "0.25.0"
44
description = "Python DB-API driver for Wherobots DB"
55
authors = [{ name = "Maxime Petazzoni", email = "max@wherobots.com" }]
66
requires-python = ">=3.10, <4"

tests/test_store_options.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
"""Tests for Store options support."""
2+
3+
import json
4+
5+
from wherobots.db.models import Store
6+
from wherobots.db.types import StorageFormat
7+
8+
9+
class TestStoreOptions:
10+
"""Tests for the options field on Store."""
11+
12+
def test_default_options_is_none(self):
13+
store = Store(format=StorageFormat.PARQUET)
14+
assert store.options is None
15+
16+
def test_options_set(self):
17+
store = Store(
18+
format=StorageFormat.CSV,
19+
options={"header": "false", "delimiter": "|"},
20+
)
21+
assert store.options == {"header": "false", "delimiter": "|"}
22+
23+
def test_empty_options_normalized_to_none(self):
24+
store = Store(format=StorageFormat.PARQUET, options={})
25+
assert store.options is None
26+
27+
def test_none_options_stays_none(self):
28+
store = Store(format=StorageFormat.PARQUET, options=None)
29+
assert store.options is None
30+
31+
def test_options_defensively_copied(self):
32+
original = {"header": "false"}
33+
store = Store(format=StorageFormat.CSV, options=original)
34+
# Mutating the original should not affect the store
35+
original["delimiter"] = "|"
36+
assert "delimiter" not in store.options
37+
38+
39+
class TestStoreForDownloadWithOptions:
40+
"""Tests for Store.for_download() with options parameter."""
41+
42+
def test_for_download_default_no_options(self):
43+
store = Store.for_download()
44+
assert store.options is None
45+
46+
def test_for_download_with_options(self):
47+
store = Store.for_download(options={"header": "false"})
48+
assert store.options == {"header": "false"}
49+
assert store.single is True
50+
assert store.generate_presigned_url is True
51+
52+
def test_for_download_with_format_and_options(self):
53+
store = Store.for_download(
54+
format=StorageFormat.CSV,
55+
options={"header": "false", "delimiter": "|"},
56+
)
57+
assert store.format == StorageFormat.CSV
58+
assert store.options == {"header": "false", "delimiter": "|"}
59+
60+
def test_for_download_empty_options_normalized(self):
61+
store = Store.for_download(options={})
62+
assert store.options is None
63+
64+
65+
class TestStoreSerializationWithOptions:
66+
"""Tests for Store.to_dict() serialization."""
67+
68+
def test_serialize_without_options(self):
69+
store = Store.for_download(format=StorageFormat.GEOJSON)
70+
d = store.to_dict()
71+
assert d == {
72+
"format": "geojson",
73+
"single": "true",
74+
"generate_presigned_url": "true",
75+
}
76+
assert "options" not in d
77+
78+
def test_serialize_with_options(self):
79+
store = Store.for_download(
80+
format=StorageFormat.CSV,
81+
options={"header": "false", "delimiter": "|"},
82+
)
83+
d = store.to_dict()
84+
assert d == {
85+
"format": "csv",
86+
"single": "true",
87+
"generate_presigned_url": "true",
88+
"options": {"header": "false", "delimiter": "|"},
89+
}
90+
91+
def test_serialize_empty_options_omitted(self):
92+
store = Store(format=StorageFormat.PARQUET, options={})
93+
d = store.to_dict()
94+
assert "options" not in d
95+
96+
def test_json_roundtrip_with_options(self):
97+
store = Store.for_download(
98+
format=StorageFormat.GEOJSON,
99+
options={"ignoreNullFields": "false"},
100+
)
101+
d = store.to_dict()
102+
payload = json.dumps(d)
103+
parsed = json.loads(payload)
104+
assert parsed["options"] == {"ignoreNullFields": "false"}
105+
106+
def test_full_request_shape(self):
107+
"""Verify the full execute_sql request dict shape with store options."""
108+
store = Store.for_download(
109+
format=StorageFormat.CSV,
110+
options={"header": "false"},
111+
)
112+
request = {
113+
"kind": "execute_sql",
114+
"execution_id": "test-id",
115+
"statement": "SELECT 1",
116+
}
117+
store_dict = store.to_dict()
118+
request["store"] = store_dict
119+
120+
assert request == {
121+
"kind": "execute_sql",
122+
"execution_id": "test-id",
123+
"statement": "SELECT 1",
124+
"store": {
125+
"format": "csv",
126+
"single": "true",
127+
"generate_presigned_url": "true",
128+
"options": {"header": "false"},
129+
},
130+
}

wherobots/db/connection.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,7 @@ def __execute_sql(
279279
request["enable_progress_events"] = True
280280

281281
if store:
282-
request["store"] = {
283-
"format": store.format.value,
284-
"single": str(store.single).lower(),
285-
"generate_presigned_url": str(store.generate_presigned_url).lower(),
286-
}
282+
request["store"] = store.to_dict()
287283

288284
self.__queries[execution_id] = Query(
289285
sql=sql,

wherobots/db/models.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from dataclasses import dataclass
2+
from typing import Any, Dict
23

34
import pandas
45

@@ -31,25 +32,40 @@ class Store:
3132
single: If True, store as a single file. If False, store as multiple files.
3233
generate_presigned_url: If True, generate a presigned URL for the result.
3334
Requires single=True.
35+
options: Optional dict of format-specific Spark DataFrameWriter options
36+
(e.g. ``{"header": "false", "delimiter": "|"}`` for CSV). These are
37+
applied after the server's default options, so they can override them.
38+
An empty dict is normalized to None.
3439
"""
3540

3641
format: StorageFormat
3742
single: bool = False
3843
generate_presigned_url: bool = False
44+
options: dict[str, str] | None = None
3945

4046
def __post_init__(self) -> None:
4147
if self.generate_presigned_url and not self.single:
4248
raise ValueError("Presigned URL can only be generated when single=True")
49+
# Normalize empty options to None and defensively copy.
50+
if self.options:
51+
self.options = dict(self.options)
52+
else:
53+
self.options = None
4354

4455
@classmethod
45-
def for_download(cls, format: StorageFormat | None = None) -> "Store":
56+
def for_download(
57+
cls,
58+
format: StorageFormat | None = None,
59+
options: dict[str, str] | None = None,
60+
) -> "Store":
4661
"""Create a configuration for downloading results via a presigned URL.
4762
4863
This is a convenience method that creates a configuration with
4964
single file mode and presigned URL generation enabled.
5065
5166
Args:
5267
format: The storage format.
68+
options: Optional format-specific Spark DataFrameWriter options.
5369
5470
Returns:
5571
A Store configured for single-file download with presigned URL.
@@ -58,8 +74,25 @@ def for_download(cls, format: StorageFormat | None = None) -> "Store":
5874
format=format or DEFAULT_STORAGE_FORMAT,
5975
single=True,
6076
generate_presigned_url=True,
77+
options=options,
6178
)
6279

80+
def to_dict(self) -> Dict[str, Any]:
81+
"""Serialize this Store to a dict for the WebSocket request.
82+
83+
Returns a dict suitable for inclusion as the ``"store"`` field in an
84+
``execute_sql`` request. The ``options`` key is omitted when there
85+
are no user-supplied options (backward compatible).
86+
"""
87+
d: Dict[str, Any] = {
88+
"format": self.format.value,
89+
"single": str(self.single).lower(),
90+
"generate_presigned_url": str(self.generate_presigned_url).lower(),
91+
}
92+
if self.options:
93+
d["options"] = self.options
94+
return d
95+
6396

6497
@dataclass
6598
class ExecutionResult:

0 commit comments

Comments
 (0)