diff --git a/atst/domain/authnid/crl/util.py b/atst/domain/authnid/crl/util.py index 88ab4116..a26835c5 100644 --- a/atst/domain/authnid/crl/util.py +++ b/atst/domain/authnid/crl/util.py @@ -1,10 +1,13 @@ import requests import re import os +import pendulum from html.parser import HTMLParser _DISA_CRLS = "https://iasecontent.disa.mil/pki-pke/data/crls/dod_crldps.htm" +MODIFIED_TIME_BUFFER = 15 * 60 + def fetch_disa(): response = requests.get(_DISA_CRLS) @@ -28,35 +31,66 @@ def crl_list_from_disa_html(html): parser.feed(html) return parser.crl_list + def crl_local_path(out_dir, crl_location): name = re.split("/", crl_location)[-1] crl = os.path.join(out_dir, name) return crl -def write_crl(out_dir, crl_location): + +def existing_crl_modification_time(crl): + if os.path.exists(crl): + prev_time = os.path.getmtime(crl) + buffered = prev_time + MODIFIED_TIME_BUFFER + mod_time = prev_time if pendulum.now().timestamp() < buffered else buffered + dt = pendulum.from_timestamp(mod_time, tz="GMT") + return dt.format("ddd, DD MMM YYYY HH:mm:ss zz") + + else: + return False + + +def write_crl(out_dir, target_dir, crl_location): crl = crl_local_path(out_dir, crl_location) - with requests.get(crl_location, stream=True) as r: + existing = crl_local_path(target_dir, crl_location) + options = {"stream": True} + mod_time = existing_crl_modification_time(existing) + if mod_time: + options["headers"] = {"If-Modified-Since": mod_time} + + with requests.get(crl_location, **options) as response: + if response.status_code == 304: + return False + with open(crl, "wb") as crl_file: - for chunk in r.iter_content(chunk_size=1024): + for chunk in response.iter_content(chunk_size=1024): if chunk: crl_file.write(chunk) + return True + + def remove_bad_crl(out_dir, crl_location): crl = crl_local_path(out_dir, crl_location) os.remove(crl) -def refresh_crls(out_dir, logger=None): + +def refresh_crls(out_dir, target_dir, logger): disa_html = fetch_disa() crl_list = crl_list_from_disa_html(disa_html) for crl_location in crl_list: - if logger: - logger.info("updating CRL from {}".format(crl_location)) + logger.info("updating CRL from {}".format(crl_location)) try: - write_crl(out_dir, crl_location) + if write_crl(out_dir, target_dir, crl_location): + logger.info("successfully synced CRL from {}".format(crl_location)) + else: + logger.info("no updates for CRL from {}".format(crl_location)) except requests.exceptions.ChunkedEncodingError: if logger: logger.error( - "Error downloading {}, continuing anyway".format(crl_location) + "Error downloading {}, removing file and continuing anyway".format( + crl_location + ) ) remove_bad_crl(out_dir, crl_location) @@ -71,7 +105,7 @@ if __name__ == "__main__": logger = logging.getLogger() logger.info("Updating CRLs") try: - refresh_crls(sys.argv[1], logger=logger) + refresh_crls(sys.argv[1], sys.argv[2], logger) except Exception as err: logger.exception("Fatal error encountered, stopping") sys.exit(1) diff --git a/script/sync-crls b/script/sync-crls index 615ee57a..a8a3ff97 100755 --- a/script/sync-crls +++ b/script/sync-crls @@ -5,7 +5,7 @@ set -e cd "$(dirname "$0")/.." mkdir -p crl-tmp -pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp +pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp crl mkdir -p crl rsync -rq --min-size 400 crl-tmp/. crl/. rm -rf crl-tmp diff --git a/tests/domain/authnid/test_crl.py b/tests/domain/authnid/test_crl.py index 5593a865..1b9fa2ec 100644 --- a/tests/domain/authnid/test_crl.py +++ b/tests/domain/authnid/test_crl.py @@ -66,8 +66,9 @@ def test_parse_disa_pki_list(): assert len(crl_list) == len(href_matches) class MockStreamingResponse(): - def __init__(self, content_chunks): + def __init__(self, content_chunks, code=200): self.content_chunks = content_chunks + self.status_code = code def iter_content(self, chunk_size=0): return self.content_chunks @@ -81,6 +82,10 @@ class MockStreamingResponse(): def test_write_crl(tmpdir, monkeypatch): monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked'])) crl = 'crl_1' - util.write_crl(tmpdir, crl) + assert util.write_crl(tmpdir, "random_target_dir", crl) assert [p.basename for p in tmpdir.listdir()] == [crl] assert [p.read() for p in tmpdir.listdir()] == ['it worked'] + +def test_skips_crl_if_it_has_not_been_modified(tmpdir, monkeypatch): + monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked'], 304)) + assert not util.write_crl(tmpdir, "random_target_dir", 'crl_file_name')