Skip to content

Commit d7afd76

Browse files
EarthDaily Python Client Release - 1.12.0 (#229)
1 parent b81fa3d commit d7afd76

20 files changed

+1602
-115
lines changed

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [1.12.0]
8+
9+
### Added
10+
11+
- Download resolvers for EarthData (NASA), EUMETSAT, and S3 asset protocols
12+
- `EarthDataResolver` for NASA EarthData URLs with JWT token authentication
13+
- `EUMETSATResolver` for EUMETSAT Data Store URLs with OAuth2 client credentials
14+
- `S3Resolver` for `s3://` downloads via boto3 with `file:local_path` support and full S3 key path preservation
15+
- `EDSConfig` fields for third-party credentials: `earthdata_token`, `eumetsat_client_id`, `eumetsat_client_secret`
16+
- `download` optional dependency group (`pip install 'earthdaily[download]'`) for boto3-based S3 downloads
17+
- Progress bar (`tqdm`) support for S3 downloads
18+
19+
### Changed
20+
21+
- `ResolverRegistry.get_resolver()` now returns `None` instead of a `DefaultResolver` when no configured resolver matches
22+
- `get_resolver_for_url()` performs URL-based auto-discovery across known resolver classes before falling back to `DefaultResolver`
23+
- Non-HTTP URLs (e.g. `s3://`) no longer attempt a pointless HTTP fallback when direct download fails
24+
725
## [1.11.1]
826

927
### Changed

earthdaily/_eds_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,16 @@ class EDSConfig:
7676
- backoff_factor=0.5: delays of 0.5s, 1s, 2s for retries 1, 2, 3
7777
- backoff_factor=2.0: delays of 2s, 4s, 8s for retries 1, 2, 3
7878
79+
earthdata_token: str, optional
80+
NASA EarthData JWT token for downloading EarthData assets.
81+
If not provided, it defaults to `EARTHDATA_TOKEN` from environment variables.
82+
eumetsat_client_id: str, optional
83+
EUMETSAT API client ID for downloading EUMETSAT assets.
84+
If not provided, it defaults to `EUMETSAT_CLIENT_ID` from environment variables.
85+
eumetsat_client_secret: str, optional
86+
EUMETSAT API client secret for downloading EUMETSAT assets.
87+
If not provided, it defaults to `EUMETSAT_CLIENT_SECRET` from environment variables.
88+
7989
Raises:
8090
-------
8191
ValueError
@@ -98,6 +108,10 @@ class EDSConfig:
98108
max_retries: int = 3
99109
retry_backoff_factor: float = 1.0
100110

111+
earthdata_token: str = field(default_factory=lambda: os.getenv("EARTHDATA_TOKEN", ""))
112+
eumetsat_client_id: str = field(default_factory=lambda: os.getenv("EUMETSAT_CLIENT_ID", ""))
113+
eumetsat_client_secret: str = field(default_factory=lambda: os.getenv("EUMETSAT_CLIENT_SECRET", ""))
114+
101115
def __post_init__(self):
102116
"""Validate that required fields are provided and raise errors if not."""
103117
if self.json_path:

earthdaily/_eds_logging.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ def _initialize_logger(self) -> logging.Logger:
3131
"""
3232
logger = logging.getLogger(self.logger_name)
3333
logger.setLevel(self.log_level)
34+
logger.propagate = False
3435

3536
if not logger.hasHandlers():
3637
console_handler = logging.StreamHandler()

earthdaily/platform/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def __init__(self, api_requester: APIRequester, asset_access_mode: AssetAccessMo
4040
api_requester : APIRequester
4141
An instance of APIRequester used to send HTTP requests to the EDS API.
4242
asset_access_mode : AssetAccessMode
43-
The mode of access for assets. Defaults to AssetAccessMode.PRESIGNED_URLS.
43+
The mode of access for assets.
4444
"""
4545
self.api_requester = api_requester
4646
self.bulk_search = BulkSearchService(api_requester)
@@ -55,7 +55,14 @@ def __init__(self, api_requester: APIRequester, asset_access_mode: AssetAccessMo
5555
**api_requester.headers,
5656
}
5757

58-
self.pystac_client = self._create_pystac_client()
58+
self._pystac_client: Optional[Client] = None
59+
60+
@property
61+
def pystac_client(self) -> Client:
62+
"""Lazily create the pystac Client on first access to avoid an HTTP call at init time."""
63+
if self._pystac_client is None:
64+
self._pystac_client = self._create_pystac_client()
65+
return self._pystac_client
5966

6067
def _create_pystac_client(self) -> Client:
6168
"""Create a new pystac Client instance with configured retry and fresh auth token."""
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
from __future__ import annotations
2+
3+
import base64
4+
import json
5+
import logging
6+
import re
7+
import time
8+
from datetime import datetime, timedelta, timezone
9+
from os import makedirs
10+
from pathlib import Path
11+
from typing import Any
12+
from urllib.parse import urlparse
13+
14+
import requests
15+
from requests.auth import HTTPBasicAuth
16+
from tqdm import tqdm
17+
18+
from earthdaily.platform._resolver_base import AssetResolver, DomainPatternResolver
19+
20+
logger = logging.getLogger(__name__)
21+
22+
_EARTHDATA_DOMAINS = [
23+
"data.lpdaac.earthdatacloud.nasa.gov",
24+
"data.laadsdaac.earthdatacloud.nasa.gov",
25+
"cmr.earthdata.nasa.gov",
26+
"ladsweb.modaps.eosdis.nasa.gov",
27+
"data.nsidc.earthdatacloud.nasa.gov",
28+
]
29+
30+
_S3_HTTPS_PATTERN = re.compile(r"(s3-|s3\.)?(.*)\.amazonaws\.com")
31+
32+
33+
class EarthDataResolver(DomainPatternResolver):
34+
"""Resolver for NASA EarthData assets.
35+
36+
Accepts a JWT token directly. The token is validated for expiry before
37+
each request; callers are responsible for providing a non-expired token.
38+
"""
39+
40+
needs_credentials = True
41+
42+
domain_pattern = re.compile("|".join(re.escape(d) for d in _EARTHDATA_DOMAINS))
43+
44+
def __init__(self, token: str) -> None:
45+
self._raw_token = token
46+
47+
@classmethod
48+
def from_config(cls, config: Any) -> EarthDataResolver | None:
49+
token = getattr(config, "earthdata_token", "") or ""
50+
return cls(token=token) if token else None
51+
52+
@staticmethod
53+
def _add_base64_padding(data: str) -> str:
54+
return data + "=" * (-len(data) % 4)
55+
56+
@classmethod
57+
def _validate_jwt(cls, jwt_token: str) -> dict:
58+
"""Decode and validate a JWT token, raising on expiry or bad format."""
59+
parts = jwt_token.split(".")
60+
if len(parts) != 3:
61+
raise ValueError("Invalid EarthData JWT: expected three dot-separated parts.")
62+
63+
try:
64+
decoded = base64.urlsafe_b64decode(cls._add_base64_padding(parts[1])).decode("utf-8")
65+
except Exception as exc:
66+
raise ValueError("Invalid EarthData JWT: payload is not valid Base64.") from exc
67+
68+
try:
69+
payload = json.loads(decoded)
70+
except json.JSONDecodeError as exc:
71+
raise ValueError("Invalid EarthData JWT: payload is not valid JSON.") from exc
72+
73+
exp = payload.get("exp")
74+
if not exp:
75+
raise ValueError("Invalid EarthData JWT: missing 'exp' claim.")
76+
if time.time() > exp:
77+
raise ValueError(f"EarthData JWT expired on {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(exp))}.")
78+
return payload
79+
80+
def get_download_url(self, url: str) -> str:
81+
return url
82+
83+
def get_headers(self, url: str) -> dict[str, str]:
84+
self._validate_jwt(self._raw_token)
85+
return {"Authorization": f"Bearer {self._raw_token}"}
86+
87+
88+
class EUMETSATResolver(DomainPatternResolver):
89+
"""Resolver for EUMETSAT assets.
90+
91+
Performs OAuth2 ``client_credentials`` token exchange against
92+
``https://api.eumetsat.int/token`` and caches the bearer token with a
93+
60-second safety buffer before expiry.
94+
"""
95+
96+
needs_credentials = True
97+
98+
domain_pattern = re.compile(r"api\.eumetsat\.int$")
99+
100+
_TOKEN_URL = "https://api.eumetsat.int/token"
101+
102+
def __init__(self, client_id: str, client_secret: str) -> None:
103+
self._client_id = client_id
104+
self._client_secret = client_secret
105+
self._bearer_token: str | None = None
106+
self._token_expires: datetime | None = None
107+
108+
@classmethod
109+
def from_config(cls, config: Any) -> EUMETSATResolver | None:
110+
client_id = getattr(config, "eumetsat_client_id", "") or ""
111+
client_secret = getattr(config, "eumetsat_client_secret", "") or ""
112+
if client_id and client_secret:
113+
return cls(client_id=client_id, client_secret=client_secret)
114+
return None
115+
116+
def _refresh_token_if_needed(self) -> None:
117+
if self._bearer_token and self._token_expires and self._token_expires > datetime.now(timezone.utc):
118+
return
119+
120+
response = requests.post(
121+
self._TOKEN_URL,
122+
auth=HTTPBasicAuth(self._client_id, self._client_secret),
123+
timeout=15,
124+
data={"grant_type": "client_credentials"},
125+
)
126+
response.raise_for_status()
127+
body = response.json()
128+
self._bearer_token = body["access_token"]
129+
self._token_expires = datetime.now(timezone.utc) + timedelta(seconds=int(body["expires_in"]) - 60)
130+
131+
def get_download_url(self, url: str) -> str:
132+
return url
133+
134+
def get_headers(self, url: str) -> dict[str, str]:
135+
self._refresh_token_if_needed()
136+
return {"Authorization": f"Bearer {self._bearer_token}"}
137+
138+
139+
class S3Resolver(AssetResolver):
140+
"""Resolver for S3 assets (``s3://`` and S3 HTTPS URLs).
141+
142+
Uses a ``boto3`` S3 client for ``s3://`` URLs and falls back to plain HTTP
143+
for S3 HTTPS URLs.
144+
145+
When constructed without an explicit *s3_client*, a default client is
146+
lazily created from the environment's AWS credential chain on first
147+
download. This requires ``boto3`` to be installed
148+
(``pip install 'earthdaily[download]'``).
149+
150+
Parameters
151+
----------
152+
s3_client :
153+
A ``boto3`` S3 client (``boto3.client("s3")``). If ``None``, one is
154+
created lazily using default AWS credentials.
155+
requester_pays :
156+
Whether to send ``RequestPayer: requester`` for ``s3://`` downloads.
157+
"""
158+
159+
needs_credentials = False
160+
161+
def __init__(
162+
self,
163+
s3_client: Any = None,
164+
requester_pays: bool = True,
165+
) -> None:
166+
self._s3_client = s3_client
167+
self._requester_pays = requester_pays
168+
169+
def _get_s3_client(self) -> Any:
170+
"""Return the S3 client, lazily creating a default one if needed."""
171+
if self._s3_client is None:
172+
try:
173+
import boto3
174+
except ImportError:
175+
raise ImportError(
176+
"boto3 is required for S3 downloads. Install it with: pip install 'earthdaily[download]'"
177+
)
178+
self._s3_client = boto3.client("s3")
179+
return self._s3_client
180+
181+
@staticmethod
182+
def _extract_s3_href(asset_dict: dict) -> str:
183+
"""Prefer ``alternate.s3.href`` over the top-level ``href``."""
184+
alt = asset_dict.get("alternate", {}).get("s3", {}).get("href", "")
185+
return alt if alt else asset_dict.get("href", "")
186+
187+
def can_handle(self, url: str) -> bool:
188+
parsed = urlparse(url, allow_fragments=False)
189+
if parsed.scheme == "s3":
190+
return not parsed.path.endswith("/")
191+
if parsed.scheme == "https" and _S3_HTTPS_PATTERN.match(parsed.netloc):
192+
return not parsed.path.endswith("/")
193+
return False
194+
195+
def get_download_url(self, url: str) -> str:
196+
return url
197+
198+
def get_headers(self, url: str) -> dict[str, str]:
199+
return {}
200+
201+
def download(
202+
self, url: str, output_dir: Path, quiet: bool = False, asset_metadata: dict | None = None
203+
) -> Path | None:
204+
"""Download an S3 asset directly via boto3.
205+
206+
For ``s3://`` URLs this uses ``s3_client.download_file``. For HTTPS
207+
S3 URLs the method returns ``None`` to fall back to the standard HTTP
208+
download path.
209+
210+
When *asset_metadata* contains a ``file:local_path`` field the file is
211+
saved at ``output_dir / <file:local_path>`` instead of the default
212+
flat filename derived from the S3 key.
213+
"""
214+
parsed = urlparse(url, allow_fragments=False)
215+
216+
if parsed.scheme != "s3":
217+
return None
218+
219+
bucket = parsed.netloc
220+
key = parsed.path.lstrip("/")
221+
222+
local_path = (asset_metadata or {}).get("file:local_path")
223+
if local_path:
224+
dest = output_dir / local_path
225+
else:
226+
dest = output_dir / key
227+
228+
makedirs(dest.parent, exist_ok=True)
229+
230+
extra_args: dict | None = None
231+
if self._requester_pays:
232+
extra_args = {"RequestPayer": "requester"}
233+
234+
s3 = self._get_s3_client()
235+
logger.info("Downloading %s to %s", url, dest)
236+
237+
if quiet:
238+
s3.download_file(bucket, key, str(dest), ExtraArgs=extra_args)
239+
else:
240+
head = s3.head_object(Bucket=bucket, Key=key, **(extra_args or {}))
241+
total = head["ContentLength"]
242+
with tqdm(total=total, unit="B", unit_scale=True, desc=dest.name) as pbar:
243+
s3.download_file(bucket, key, str(dest), ExtraArgs=extra_args, Callback=pbar.update)
244+
245+
return dest

0 commit comments

Comments
 (0)