Skip to content

Commit 1f8a223

Browse files
Merge pull request #53 from NYPL/s3-update
Allow uploading arbitrary files to s3
2 parents 071ebcf + 82ab5dd commit 1f8a223

File tree

5 files changed

+74
-33
lines changed

5 files changed

+74
-33
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
# Changelog
2+
## v1.9.0 3/10/26
3+
- Add capability to upload arbitrary file type to S3
4+
25
## v1.8.0 8/19/25
36
- Add optional JSON structured logging
47

pyproject.toml

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "nypl_py_utils"
7-
version = "1.8.0"
7+
version = "1.9.0"
88
authors = [
99
{ name="Aaron Friedman", email="aaronfriedman@nypl.org" },
1010
]
@@ -24,64 +24,76 @@ dependencies = []
2424

2525
[project.optional-dependencies]
2626
avro-client = [
27+
"nypl_py_utils[log-helper]",
2728
"fastavro>=1.11.1",
2829
"requests>=2.28.1"
2930
]
3031
cloudlibrary-client = [
32+
"nypl_py_utils[log-helper]",
3133
"requests>=2.28.1"
3234
]
3335
kinesis-client = [
36+
"nypl_py_utils[log-helper]",
3437
"boto3>=1.26.5",
3538
"botocore>=1.29.5"
3639
]
3740
kms-client = [
41+
"nypl_py_utils[log-helper]",
3842
"boto3>=1.26.5",
3943
"botocore>=1.29.5"
4044
]
41-
log_helper = [
42-
"structlog>=25.4.0"
43-
]
4445
mysql-client = [
46+
"nypl_py_utils[log-helper]",
4547
"mysql-connector-python>=8.0.32"
4648
]
4749
oauth2-api-client = [
50+
"nypl_py_utils[log-helper]",
4851
"oauthlib>=3.2.2",
4952
"requests_oauthlib>=1.3.1"
5053
]
5154
postgresql-client = [
55+
"nypl_py_utils[log-helper]",
5256
"psycopg[binary]>=3.1.6"
5357
]
5458
redshift-client = [
59+
"nypl_py_utils[log-helper]",
5560
"botocore>=1.29.5",
5661
"redshift-connector>=2.0.909"
5762
]
5863
s3-client = [
64+
"nypl_py_utils[log-helper]",
5965
"boto3>=1.26.5",
6066
"botocore>=1.29.5"
6167
]
6268
secrets-manager-client = [
69+
"nypl_py_utils[log-helper]",
6370
"boto3>=1.26.5",
6471
"botocore>=1.29.5"
6572
]
6673
sftp-client = [
74+
"nypl_py_utils[log-helper]",
6775
"paramiko>=3.4.1"
6876
]
6977
config-helper = [
70-
"nypl_py_utils[kms-client]",
78+
"nypl_py_utils[kms-client,log-helper]",
7179
"PyYAML>=6.0"
7280
]
81+
log-helper = [
82+
"structlog>=25.5.0"
83+
]
7384
obfuscation-helper = [
85+
"nypl_py_utils[log-helper]",
7486
"bcrypt>=4.0.1"
7587
]
7688
patron-data-helper = [
77-
"nypl_py_utils[postgresql-client,redshift-client]>=1.1.5",
89+
"nypl_py_utils[postgresql-client,redshift-client,log-helper]>=1.1.5",
7890
"pandas>=2.2.2"
7991
]
8092
research-catalog-identifier-helper = [
8193
"requests>=2.28.1"
8294
]
8395
development = [
84-
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper,log_helper]",
96+
"nypl_py_utils[avro-client,kinesis-client,kms-client,mysql-client,oauth2-api-client,postgresql-client,redshift-client,s3-client,secrets-manager-client,sftp-client,config-helper,log-helper,obfuscation-helper,patron-data-helper,research-catalog-identifier-helper]",
8597
"flake8>=6.0.0",
8698
"freezegun>=1.2.2",
8799
"mock>=4.0.3",

src/nypl_py_utils/classes/s3_client.py

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ class S3Client:
1111
"""
1212
Client for fetching and setting an AWS S3 file.
1313
14-
Takes as input the name of the S3 bucket and resource to be fetched/set.
14+
Takes as input the name of the S3 bucket. If fetching/setting a cache, also
15+
takes the cached resource.
1516
"""
1617

17-
def __init__(self, bucket, resource):
18+
def __init__(self, bucket, resource=None):
1819
self.logger = create_log('s3_client')
1920
self.bucket = bucket
2021
self.resource = resource
@@ -23,49 +24,68 @@ def __init__(self, bucket, resource):
2324
self.s3_client = boto3.client(
2425
's3', region_name=os.environ.get('AWS_REGION', 'us-east-1'))
2526
except ClientError as e:
26-
self.logger.error(
27-
'Could not create S3 client: {err}'.format(err=e))
28-
raise S3ClientError(
29-
'Could not create S3 client: {err}'.format(err=e)) from None
27+
error_msg = f'Could not create S3 client: {e}'
28+
self.logger.error(error_msg)
29+
raise S3ClientError(error_msg) from None
3030

3131
def close(self):
3232
self.s3_client.close()
3333

3434
def fetch_cache(self):
3535
"""Fetches a JSON file from S3 and returns the resulting dictionary"""
36-
self.logger.info('Fetching {file} from S3 bucket {bucket}'.format(
37-
file=self.resource, bucket=self.bucket))
36+
self.logger.info(
37+
f'Fetching {self.resource} from S3 bucket {self.bucket}')
3838
try:
3939
output_stream = BytesIO()
4040
self.s3_client.download_fileobj(
4141
self.bucket, self.resource, output_stream)
4242
return json.loads(output_stream.getvalue())
4343
except ClientError as e:
44-
self.logger.error(
45-
'Error retrieving {file} from S3 bucket {bucket}: {error}'
46-
.format(file=self.resource, bucket=self.bucket, error=e))
47-
raise S3ClientError(
48-
'Error retrieving {file} from S3 bucket {bucket}: {error}'
49-
.format(file=self.resource, bucket=self.bucket, error=e)
50-
) from None
44+
error_msg = (
45+
f'Error retrieving {self.resource} from S3 bucket '
46+
f'{self.bucket}: {e}')
47+
self.logger.error(error_msg)
48+
raise S3ClientError(error_msg) from None
5149

5250
def set_cache(self, state):
5351
"""Writes a dictionary to JSON and uploads the resulting file to S3"""
5452
self.logger.info(
55-
'Setting {file} in S3 bucket {bucket} to {state}'.format(
56-
file=self.resource, bucket=self.bucket, state=state))
53+
f'Setting {self.resource} in S3 bucket {self.bucket} to {state}')
5754
try:
5855
input_stream = BytesIO(json.dumps(state).encode())
5956
self.s3_client.upload_fileobj(
6057
input_stream, self.bucket, self.resource)
6158
except ClientError as e:
62-
self.logger.error(
63-
'Error uploading {file} to S3 bucket {bucket}: {error}'
64-
.format(file=self.resource, bucket=self.bucket, error=e))
65-
raise S3ClientError(
66-
'Error uploading {file} to S3 bucket {bucket}: {error}'
67-
.format(file=self.resource, bucket=self.bucket, error=e)
68-
) from None
59+
error_msg = (
60+
f'Error uploading {self.resource} to S3 bucket '
61+
f'{self.s3_bucket}: {e}')
62+
self.logger.error(error_msg)
63+
raise S3ClientError(error_msg) from None
64+
65+
def upload_file(self, content, file_path):
66+
"""
67+
Writes an arbitrary file to S3. Note that this will overwrite any
68+
existing file with the same name.
69+
70+
Parameters
71+
----------
72+
content: str
73+
The string that should be written to the file. Must be utf-8.
74+
file_path: str
75+
The full path of the file that should be written not including the
76+
bucket. Example: "subdirectory/example_file.csv"
77+
"""
78+
self.logger.info(
79+
f'Writing {file_path} in S3 bucket {self.s3_client.name}')
80+
try:
81+
input_stream = BytesIO(content.encode())
82+
self.s3_client.upload_fileobj(input_stream, self.bucket, file_path)
83+
except ClientError as e:
84+
error_msg = (
85+
f'Error uploading {file_path} to S3 bucket '
86+
f'{self.s3_bucket}: {e}')
87+
self.logger.error(error_msg)
88+
raise S3ClientError(error_msg) from None
6989

7090

7191
class S3ClientError(Exception):

src/nypl_py_utils/functions/log_helper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import structlog
2-
31
import logging
42
import os
3+
import structlog
54
import sys
65

76
levels = {

tests/test_s3_client.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,10 @@ def test_set_cache(self, test_instance):
2828
assert arguments[0].getvalue() == json.dumps(_TEST_STATE).encode()
2929
assert arguments[1] == 'test_s3_bucket'
3030
assert arguments[2] == 'test_s3_resource'
31+
32+
def test_upload_file(self, test_instance):
33+
test_instance.upload_file('test_content', 'test_filename.txt')
34+
arguments = test_instance.s3_client.upload_fileobj.call_args.args
35+
assert arguments[0].getvalue() == b'test_content'
36+
assert arguments[1] == 'test_s3_bucket'
37+
assert arguments[2] == 'test_filename.txt'

0 commit comments

Comments
 (0)