Skip to content

Commit 4068d53

Browse files
committed
fix: bounded retry with exponential backoff for 403 throttle responses
The execute_rest() function previously retried throttled (403) responses every 10 seconds with no maximum retry count. This caused Commander to hang indefinitely when throttled, and the 10-second retry interval prevented the server's cooldown timer from expiring. Changes: - Add max retry count (3 attempts) before raising KeeperApiError - Parse the server's "try again in X minutes/seconds" message - Use exponential backoff (30s, 60s, 120s) capped at server's suggestion - Cap server wait time at 300s to prevent excessive delays - Log throttle attempts as warnings instead of debug - After max retries, raise KeeperApiError so callers can handle it The --fail-on-throttle flag continues to work as before (immediate error). Unit tests (9 cases): - Normal request unaffected by throttle logic - Throttle twice then succeed (backoff 30s, 60s) - KeeperApiError raised after 3 retries - --fail-on-throttle skips retries entirely - Parses "try again in X seconds" correctly - Parses "try again in X minutes" correctly - Caps server wait at 300s - Exponential backoff progression (30s, 60s, 120s) - Missing message defaults to 60s
1 parent c73360a commit 4068d53

2 files changed

Lines changed: 216 additions & 2 deletions

File tree

keepercommander/rest_api.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
# Contact: ops@keepersecurity.com
1010
#
1111

12+
import re
1213
import requests
1314
import os
1415
import json
@@ -142,6 +143,8 @@ def execute_rest(context, endpoint, payload, timeout=None):
142143
context.server_key_id = 7
143144

144145
run_request = True
146+
throttle_retries = 0
147+
max_throttle_retries = 3
145148
while run_request:
146149
run_request = False
147150

@@ -251,8 +254,25 @@ def execute_rest(context, endpoint, payload, timeout=None):
251254
continue
252255
elif rs.status_code == 403:
253256
if failure.get('error') == 'throttled' and not context.fail_on_throttle:
254-
logging.debug('Throttled, retrying in 10 seconds')
255-
time.sleep(10)
257+
throttle_retries += 1
258+
if throttle_retries > max_throttle_retries:
259+
raise KeeperApiError(failure.get('error'), failure.get('message'))
260+
# Parse server's suggested wait time, default to 60s
261+
wait_seconds = 60
262+
message = failure.get('message', '')
263+
wait_match = re.search(r'(\d+)\s*(second|minute)', message, re.IGNORECASE)
264+
if wait_match:
265+
wait_val = int(wait_match.group(1))
266+
if 'minute' in wait_match.group(2).lower():
267+
wait_seconds = wait_val * 60
268+
else:
269+
wait_seconds = wait_val
270+
# Cap server suggestion at 5 minutes, then take the larger of suggestion vs backoff
271+
wait_seconds = min(wait_seconds, 300)
272+
backoff = max(wait_seconds, 30 * (2 ** (throttle_retries - 1)))
273+
logging.warning('Throttled (attempt %d/%d), retrying in %d seconds',
274+
throttle_retries, max_throttle_retries, backoff)
275+
time.sleep(backoff)
256276
run_request = True
257277
continue
258278
elif rs.status_code in (400, 500) and context.qrc_key_id is not None:

unit-tests/test_throttle_retry.py

Lines changed: 194 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,194 @@
1+
"""Tests for execute_rest() throttle retry logic in rest_api.py.
2+
3+
Verifies:
4+
- Normal (non-throttled) requests are unaffected
5+
- Throttled requests retry up to 3 times with exponential backoff
6+
- KeeperApiError raised after max retries
7+
- --fail-on-throttle skips retries entirely
8+
- Server's "try again in X" message is parsed correctly (seconds + minutes)
9+
- Server wait capped at 300s
10+
- Backoff takes the larger of server hint vs exponential schedule
11+
"""
12+
13+
import json
14+
import os
15+
import sys
16+
import unittest
17+
from unittest.mock import patch, MagicMock
18+
19+
# Add parent dir so imports work from unit-tests/
20+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
21+
22+
from keepercommander.rest_api import execute_rest
23+
from keepercommander.error import KeeperApiError
24+
from keepercommander.params import RestApiContext
25+
from keepercommander.proto import APIRequest_pb2 as proto
26+
27+
28+
def make_context(fail_on_throttle=False):
29+
"""Create a real RestApiContext with QRC disabled to simplify mocking."""
30+
ctx = RestApiContext(server='https://keepersecurity.com', locale='en_US')
31+
ctx.transmission_key = os.urandom(32)
32+
ctx.server_key_id = 7
33+
ctx.fail_on_throttle = fail_on_throttle
34+
ctx.disable_qrc() # Skip QRC key negotiation
35+
return ctx
36+
37+
38+
def make_throttle_response(message="Please try again in 1 minute"):
39+
"""Build a fake HTTP 403 throttle response."""
40+
body = {"error": "throttled", "message": message}
41+
resp = MagicMock()
42+
resp.status_code = 403
43+
resp.headers = {'Content-Type': 'application/json'}
44+
resp.json.return_value = body
45+
resp.content = json.dumps(body).encode()
46+
return resp
47+
48+
49+
def make_success_response():
50+
"""Build a fake HTTP 200 response with empty body."""
51+
resp = MagicMock()
52+
resp.status_code = 200
53+
resp.headers = {'Content-Type': 'application/octet-stream'}
54+
resp.content = b''
55+
return resp
56+
57+
58+
def make_payload():
59+
"""Create a minimal ApiRequestPayload for execute_rest."""
60+
return proto.ApiRequestPayload()
61+
62+
63+
class TestThrottleRetry(unittest.TestCase):
64+
"""Tests for the bounded retry with exponential backoff on 403 throttle."""
65+
66+
@patch('keepercommander.rest_api.time.sleep')
67+
@patch('keepercommander.rest_api.requests.post')
68+
def test_normal_request_unaffected(self, mock_post, mock_sleep):
69+
"""Non-throttled 200 response should pass through with no retries."""
70+
mock_post.return_value = make_success_response()
71+
72+
execute_rest(make_context(), 'test/endpoint', make_payload())
73+
74+
self.assertEqual(mock_post.call_count, 1)
75+
mock_sleep.assert_not_called()
76+
77+
@patch('keepercommander.rest_api.time.sleep')
78+
@patch('keepercommander.rest_api.requests.post')
79+
def test_retries_then_succeeds(self, mock_post, mock_sleep):
80+
"""Throttle twice, succeed on 3rd attempt."""
81+
mock_post.side_effect = [
82+
make_throttle_response("try again in 30 seconds"),
83+
make_throttle_response("try again in 30 seconds"),
84+
make_success_response(),
85+
]
86+
87+
execute_rest(make_context(), 'test/endpoint', make_payload())
88+
89+
self.assertEqual(mock_post.call_count, 3)
90+
self.assertEqual(mock_sleep.call_count, 2)
91+
# 1st: max(30, 30*2^0=30) = 30
92+
# 2nd: max(30, 30*2^1=60) = 60
93+
calls = [c.args[0] for c in mock_sleep.call_args_list]
94+
self.assertEqual(calls, [30, 60])
95+
96+
@patch('keepercommander.rest_api.time.sleep')
97+
@patch('keepercommander.rest_api.requests.post')
98+
def test_raises_after_max_retries(self, mock_post, mock_sleep):
99+
"""Always throttled — should raise KeeperApiError after 3 retries."""
100+
mock_post.return_value = make_throttle_response("try again in 1 minute")
101+
102+
with self.assertRaises(KeeperApiError):
103+
execute_rest(make_context(), 'test/endpoint', make_payload())
104+
105+
# 1 initial + 3 retries = 4 posts, error raised when retry 4 > max 3
106+
self.assertEqual(mock_post.call_count, 4)
107+
self.assertEqual(mock_sleep.call_count, 3)
108+
109+
@patch('keepercommander.rest_api.time.sleep')
110+
@patch('keepercommander.rest_api.requests.post')
111+
def test_fail_on_throttle_skips_retry(self, mock_post, mock_sleep):
112+
"""--fail-on-throttle should return error immediately with no retries."""
113+
mock_post.return_value = make_throttle_response()
114+
115+
result = execute_rest(make_context(fail_on_throttle=True), 'test/endpoint', make_payload())
116+
117+
# When fail_on_throttle=True, the throttle block is skipped and the
118+
# failure dict is returned directly (no retry, no exception)
119+
self.assertEqual(result.get('error'), 'throttled')
120+
self.assertEqual(mock_post.call_count, 1)
121+
mock_sleep.assert_not_called()
122+
123+
@patch('keepercommander.rest_api.time.sleep')
124+
@patch('keepercommander.rest_api.requests.post')
125+
def test_parses_seconds_hint(self, mock_post, mock_sleep):
126+
"""Server says 'try again in 45 seconds' — wait should be 45s."""
127+
mock_post.side_effect = [
128+
make_throttle_response("try again in 45 seconds"),
129+
make_success_response(),
130+
]
131+
132+
execute_rest(make_context(), 'test/endpoint', make_payload())
133+
134+
# max(45, 30*2^0=30) = 45
135+
mock_sleep.assert_called_once_with(45)
136+
137+
@patch('keepercommander.rest_api.time.sleep')
138+
@patch('keepercommander.rest_api.requests.post')
139+
def test_parses_minutes_hint(self, mock_post, mock_sleep):
140+
"""Server says 'try again in 2 minutes' — wait should be 120s."""
141+
mock_post.side_effect = [
142+
make_throttle_response("try again in 2 minutes"),
143+
make_success_response(),
144+
]
145+
146+
execute_rest(make_context(), 'test/endpoint', make_payload())
147+
148+
# max(120, 30*2^0=30) = 120
149+
mock_sleep.assert_called_once_with(120)
150+
151+
@patch('keepercommander.rest_api.time.sleep')
152+
@patch('keepercommander.rest_api.requests.post')
153+
def test_caps_server_wait_at_300s(self, mock_post, mock_sleep):
154+
"""Server says 'try again in 49 minutes' — capped to 300s."""
155+
mock_post.side_effect = [
156+
make_throttle_response("try again in 49 minutes"),
157+
make_success_response(),
158+
]
159+
160+
execute_rest(make_context(), 'test/endpoint', make_payload())
161+
162+
# min(2940, 300)=300; max(300, 30*2^0=30) = 300
163+
mock_sleep.assert_called_once_with(300)
164+
165+
@patch('keepercommander.rest_api.time.sleep')
166+
@patch('keepercommander.rest_api.requests.post')
167+
def test_exponential_backoff_progression(self, mock_post, mock_sleep):
168+
"""Verify backoff doubles: 30s, 60s, 120s when server hint is small."""
169+
mock_post.return_value = make_throttle_response("try again in 10 seconds")
170+
171+
with self.assertRaises(KeeperApiError):
172+
execute_rest(make_context(), 'test/endpoint', make_payload())
173+
174+
# Server says 10s, but backoff wins: max(10, 30*2^0)=30, max(10, 30*2^1)=60, max(10, 30*2^2)=120
175+
calls = [c.args[0] for c in mock_sleep.call_args_list]
176+
self.assertEqual(calls, [30, 60, 120])
177+
178+
@patch('keepercommander.rest_api.time.sleep')
179+
@patch('keepercommander.rest_api.requests.post')
180+
def test_no_message_defaults_to_60s(self, mock_post, mock_sleep):
181+
"""Missing 'try again' text defaults to 60s server hint."""
182+
mock_post.side_effect = [
183+
make_throttle_response("Rate limit exceeded"), # no "try again in X"
184+
make_success_response(),
185+
]
186+
187+
execute_rest(make_context(), 'test/endpoint', make_payload())
188+
189+
# Default 60s; max(60, 30*2^0=30) = 60
190+
mock_sleep.assert_called_once_with(60)
191+
192+
193+
if __name__ == '__main__':
194+
unittest.main()

0 commit comments

Comments
 (0)