Merge pull request #163 from dod-ccpo/crl-sync-optimization

Crl sync optimization
This commit is contained in:
dandds 2018-08-13 15:01:38 -04:00 committed by GitHub
commit 50ac87576a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 59 additions and 13 deletions

View File

@ -1,10 +1,13 @@
import requests import requests
import re import re
import os import os
import pendulum
from html.parser import HTMLParser from html.parser import HTMLParser
_DISA_CRLS = "https://iasecontent.disa.mil/pki-pke/data/crls/dod_crldps.htm" _DISA_CRLS = "https://iasecontent.disa.mil/pki-pke/data/crls/dod_crldps.htm"
MODIFIED_TIME_BUFFER = 15 * 60
def fetch_disa(): def fetch_disa():
response = requests.get(_DISA_CRLS) response = requests.get(_DISA_CRLS)
@ -29,29 +32,67 @@ def crl_list_from_disa_html(html):
return parser.crl_list return parser.crl_list
def write_crl(out_dir, crl_location): def crl_local_path(out_dir, crl_location):
name = re.split("/", crl_location)[-1] name = re.split("/", crl_location)[-1]
crl = os.path.join(out_dir, name) crl = os.path.join(out_dir, name)
with requests.get(crl_location, stream=True) as r: return crl
def existing_crl_modification_time(crl):
if os.path.exists(crl):
prev_time = os.path.getmtime(crl)
buffered = prev_time + MODIFIED_TIME_BUFFER
mod_time = prev_time if pendulum.now().timestamp() < buffered else buffered
dt = pendulum.from_timestamp(mod_time, tz="GMT")
return dt.format("ddd, DD MMM YYYY HH:mm:ss zz")
else:
return False
def write_crl(out_dir, target_dir, crl_location):
crl = crl_local_path(out_dir, crl_location)
existing = crl_local_path(target_dir, crl_location)
options = {"stream": True}
mod_time = existing_crl_modification_time(existing)
if mod_time:
options["headers"] = {"If-Modified-Since": mod_time}
with requests.get(crl_location, **options) as response:
if response.status_code == 304:
return False
with open(crl, "wb") as crl_file: with open(crl, "wb") as crl_file:
for chunk in r.iter_content(chunk_size=1024): for chunk in response.iter_content(chunk_size=1024):
if chunk: if chunk:
crl_file.write(chunk) crl_file.write(chunk)
return True
def refresh_crls(out_dir, logger=None):
def remove_bad_crl(out_dir, crl_location):
crl = crl_local_path(out_dir, crl_location)
os.remove(crl)
def refresh_crls(out_dir, target_dir, logger):
disa_html = fetch_disa() disa_html = fetch_disa()
crl_list = crl_list_from_disa_html(disa_html) crl_list = crl_list_from_disa_html(disa_html)
for crl_location in crl_list: for crl_location in crl_list:
if logger: logger.info("updating CRL from {}".format(crl_location))
logger.info("updating CRL from {}".format(crl_location))
try: try:
write_crl(out_dir, crl_location) if write_crl(out_dir, target_dir, crl_location):
logger.info("successfully synced CRL from {}".format(crl_location))
else:
logger.info("no updates for CRL from {}".format(crl_location))
except requests.exceptions.ChunkedEncodingError: except requests.exceptions.ChunkedEncodingError:
if logger: if logger:
logger.error( logger.error(
"Error downloading {}, continuing anyway".format(crl_location) "Error downloading {}, removing file and continuing anyway".format(
crl_location
)
) )
remove_bad_crl(out_dir, crl_location)
if __name__ == "__main__": if __name__ == "__main__":
@ -64,7 +105,7 @@ if __name__ == "__main__":
logger = logging.getLogger() logger = logging.getLogger()
logger.info("Updating CRLs") logger.info("Updating CRLs")
try: try:
refresh_crls(sys.argv[1], logger=logger) refresh_crls(sys.argv[1], sys.argv[2], logger)
except Exception as err: except Exception as err:
logger.exception("Fatal error encountered, stopping") logger.exception("Fatal error encountered, stopping")
sys.exit(1) sys.exit(1)

View File

@ -5,9 +5,9 @@ set -e
cd "$(dirname "$0")/.." cd "$(dirname "$0")/.."
mkdir -p crl-tmp mkdir -p crl-tmp
pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp crl
mkdir -p crl mkdir -p crl
rsync -rq crl-tmp/. crl/. rsync -rq --min-size 400 crl-tmp/. crl/.
rm -rf crl-tmp rm -rf crl-tmp
if [[ $FLASK_ENV != "prod" ]]; then if [[ $FLASK_ENV != "prod" ]]; then

View File

@ -66,8 +66,9 @@ def test_parse_disa_pki_list():
assert len(crl_list) == len(href_matches) assert len(crl_list) == len(href_matches)
class MockStreamingResponse(): class MockStreamingResponse():
def __init__(self, content_chunks): def __init__(self, content_chunks, code=200):
self.content_chunks = content_chunks self.content_chunks = content_chunks
self.status_code = code
def iter_content(self, chunk_size=0): def iter_content(self, chunk_size=0):
return self.content_chunks return self.content_chunks
@ -81,6 +82,10 @@ class MockStreamingResponse():
def test_write_crl(tmpdir, monkeypatch): def test_write_crl(tmpdir, monkeypatch):
monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked'])) monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked']))
crl = 'crl_1' crl = 'crl_1'
util.write_crl(tmpdir, crl) assert util.write_crl(tmpdir, "random_target_dir", crl)
assert [p.basename for p in tmpdir.listdir()] == [crl] assert [p.basename for p in tmpdir.listdir()] == [crl]
assert [p.read() for p in tmpdir.listdir()] == ['it worked'] assert [p.read() for p in tmpdir.listdir()] == ['it worked']
def test_skips_crl_if_it_has_not_been_modified(tmpdir, monkeypatch):
monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked'], 304))
assert not util.write_crl(tmpdir, "random_target_dir", 'crl_file_name')