Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions AGENTS.md

This file was deleted.

31 changes: 31 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Documentation for AI Coding Assistants

## Commits

Each commit should do exactly one thing so that its diff is easy to
review. If a task involves multiple changes, split them into separate
commits. For example, whenever code is moved and changed, or a file is
renamed and changed, do the move or the rename in one commit and make
the changes in another. If files need to be reformatted with ruff, do
that and commit before making code changes.

## Commands

The `commcare-export` codebase uses a virtualenv managed by uv. Prefix
commands with `uv run ...` to run them in the virtualenv.

* Run tests: `uv run pytest -m "not dbtest" [path/to/file.py::Class::method]`
* Check typing: `uv run mypy commcare_export/ tests/`
* Check linting: `uv run ruff check`
* Format: `uv run ruff format <path/to/file.py>`
* Sort imports `uv run ruff check --select I --fix <path/to/file.py>`


## Tech Stack

See @pyproject.toml


## Coding Style

See @CONTRIBUTING.md
101 changes: 56 additions & 45 deletions commcare_export/writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class TableWriter:
If the implementing class does not need any setup, no-op defaults
have been provided.
"""

support_checkpoints = False

# set to False if writer does not support writing to the same table
Expand Down Expand Up @@ -121,8 +122,8 @@ def __init__(self, file):
except ImportError:
raise Exception(
"It doesn't look like this machine is configured for "
"Excel export. To export to Excel you have to run the "
"command: pip install openpyxl"
'Excel export. To export to Excel you have to run the '
'command: pip install openpyxl'
)

self.file = file
Expand All @@ -141,7 +142,7 @@ def get_sheet(self, table):
name = table.name
if name not in self.sheets:
sheet = self.book.create_sheet()
sheet.title = name[:self.max_table_name_size]
sheet.title = name[: self.max_table_name_size]
sheet.append([ensure_text(v) for v in table.headings])
self.sheets[name] = sheet

Expand All @@ -160,8 +161,8 @@ def __init__(self, file):
except ImportError:
raise Exception(
"It doesn't look like this machine is configured for "
"excel export. To export to excel you have to run the "
"command: pip install xlwt"
'excel export. To export to excel you have to run the '
'command: pip install xlwt'
)

self.file = file
Expand All @@ -183,8 +184,8 @@ def write_table(self, table):
def get_sheet(self, table):
name = table.name
if name not in self.sheets:
sheet = self.book.add_sheet(name[:self.max_table_name_size])
sheet.title = name[:self.max_table_name_size]
sheet = self.book.add_sheet(name[: self.max_table_name_size])
sheet.title = name[: self.max_table_name_size]

for colnum, val in enumerate(table.headings):
sheet.write(0, colnum, ensure_text(val))
Expand Down Expand Up @@ -215,16 +216,17 @@ def write_table(self, table):
else:
assert self.tables[table.name].headings == list(table.headings)

self.tables[table.name].rows = list(
self.tables[table.name].rows
) + [[to_jvalue(v) for v in row] for row in table.rows]
self.tables[table.name].rows = list(self.tables[table.name].rows) + [
[to_jvalue(v) for v in row] for row in table.rows
]


class StreamingMarkdownTableWriter(TableWriter):
"""
Writes markdown to an output stream, where each table just comes one
after the other
"""

supports_multi_table_write = False

def __init__(self, output_stream, compute_widths=False):
Expand All @@ -235,9 +237,9 @@ def write_table(self, table):
col_widths = None
if self.compute_widths:
col_widths = self._get_column_widths(table)
row_template = ' | '.join([
f'{{:<{width}}}' for width in col_widths
])
row_template = ' | '.join(
[f'{{:<{width}}}' for width in col_widths]
)
else:
row_template = ' | '.join(['{}'] * len(table.headings))

Expand All @@ -254,9 +256,7 @@ def write_table(self, table):

for row in table.rows:
text_row = (ensure_text(val, convert_none=True) for val in row)
self.output_stream.write(
f'| {row_template.format(*text_row)} |\n'
)
self.output_stream.write(f'| {row_template.format(*text_row)} |\n')

def _get_column_widths(self, table):
all_rows = [table.headings] + list(table.rows)
Expand Down Expand Up @@ -320,7 +320,7 @@ def max_column_length(self):
return 128
if self.is_oracle:
return 128
raise Exception(f"Unknown database dialect: {self.db_url}")
raise Exception(f'Unknown database dialect: {self.db_url}')

@property
def metadata(self):
Expand Down Expand Up @@ -357,6 +357,7 @@ class SqlTableWriter(SqlMixin, TableWriter):
Write tables to a database specified by URL
(TODO) with "upsert" based on primary key.
"""

support_checkpoints = True
required_columns = ['id']

Expand All @@ -374,9 +375,7 @@ def get_explicit_type(self, data_type):
return get_sqlalchemy_type(data_type)
except UnknownDataType:
if data_type:
logger.warning(
f"Found unknown data type '{data_type}'"
)
logger.warning(f"Found unknown data type '{data_type}'")
return self.best_type_for('') # todo: more explicit fallback

def best_type_for(self, val):
Expand Down Expand Up @@ -404,7 +403,7 @@ def best_type_for(self, val):
if len(val) < self.MAX_VARCHAR_LEN:
return sqlalchemy.Unicode(
max(len(val), self.MIN_VARCHAR_LEN),
collation=self.collation
collation=self.collation,
)
else:
return sqlalchemy.UnicodeText(collation=self.collation)
Expand All @@ -424,7 +423,7 @@ def best_type_for(self, val):
elif self.is_oracle:
return sqlalchemy.Unicode(4000, collation=self.collation)
else:
raise Exception(f"Unknown database dialect: {self.db_url}")
raise Exception(f'Unknown database dialect: {self.db_url}')
else:
# We do not have a name for "bottom" in SQL aka the type
# whose least upper bound with any other type is the other
Expand Down Expand Up @@ -453,22 +452,30 @@ def compatible(self, source_type, dest_type):
compatibility = {
sqlalchemy.String: (sqlalchemy.Text,),
sqlalchemy.Integer: (sqlalchemy.String, sqlalchemy.Text),
sqlalchemy.Boolean:
(sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Integer),
sqlalchemy.DateTime:
(sqlalchemy.String, sqlalchemy.Text, sqlalchemy.Date),
sqlalchemy.Boolean: (
sqlalchemy.String,
sqlalchemy.Text,
sqlalchemy.Integer,
),
sqlalchemy.DateTime: (
sqlalchemy.String,
sqlalchemy.Text,
sqlalchemy.Date,
),
sqlalchemy.Date: (sqlalchemy.String, sqlalchemy.Text),
}

# add dialect specific types
try:
compatibility[sqlalchemy.JSON
] = (sqlalchemy.dialects.postgresql.json.JSON,)
compatibility[sqlalchemy.JSON] = (
sqlalchemy.dialects.postgresql.json.JSON,
)
except AttributeError:
pass
try:
compatibility[sqlalchemy.Boolean
] += (sqlalchemy.dialects.mssql.base.BIT,)
compatibility[sqlalchemy.Boolean] += (
sqlalchemy.dialects.mssql.base.BIT,
)
except AttributeError:
pass

Expand Down Expand Up @@ -517,7 +524,7 @@ def make_table_compatible(self, table, row_dict, data_type_dict):
)
op.add_column(
table.name,
sqlalchemy.Column(column, val_type, nullable=True)
sqlalchemy.Column(column, val_type, nullable=True),
)
self.metadata.clear()
table = self.get_table(table.name)
Expand Down Expand Up @@ -551,26 +558,26 @@ def create_table(self, table_name, row_dict, data_type_dict):
sqlalchemy.Table(
table_name,
sqlalchemy.MetaData(),
*self._get_columns_for_data(row_dict, data_type_dict)
*self._get_columns_for_data(row_dict, data_type_dict),
)
).compile(self.connection.engine)
logger.warning(
f"Table '{table_name}' does not exist. Creating table "
f"with:\n{create_sql}"
f'with:\n{create_sql}'
)
empty_cols = [
name for (name, val) in row_dict.items()
name
for (name, val) in row_dict.items()
if val is None and name not in data_type_dict
]
if empty_cols:
logger.warning(
"This schema does not include the following columns "
"since we are unable to determine the column type at "
f"this stage: {empty_cols}"
'This schema does not include the following columns '
'since we are unable to determine the column type at '
f'this stage: {empty_cols}'
)
op.create_table(
table_name,
*self._get_columns_for_data(row_dict, data_type_dict)
table_name, *self._get_columns_for_data(row_dict, data_type_dict)
)
self.metadata.clear()
return self.get_table(table_name)
Expand All @@ -591,9 +598,11 @@ def upsert(self, table, row_dict):
insert = table.insert().values(**row_dict)
self.connection.execute(insert)
except sqlalchemy.exc.IntegrityError:
update = (table.update()
.where(table.c.id == row_dict['id'])
.values(**row_dict))
update = (
table.update()
.where(table.c.id == row_dict['id'])
.values(**row_dict)
)
self.connection.execute(update)

def write_table(self, table_spec: TableSpec) -> None:
Expand Down Expand Up @@ -621,9 +630,11 @@ def _get_columns_for_data(self, row_dict, data_type_dict):
sqlalchemy.Column(
column_name,
self.get_data_type(data_type_dict[column_name], val),
nullable=True
nullable=True,
)
for (column_name, val) in row_dict.items()
if ((val is not None or data_type_dict[column_name])
and column_name != 'id')
if (
(val is not None or data_type_dict[column_name])
and column_name != 'id'
)
]
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,9 @@ local_scheme = "no-local-version"
testpaths = ["tests"]
python_files = ["test_*.py"]
addopts = ["-vv", "--tb=short"]

[tool.ruff]
line-length = 79

[tool.ruff.format]
quote-style = "single"
Loading
Loading