use if-modified-since header to optimize CRL updates
This commit is contained in:
parent
250b2d9942
commit
3d9987ac13
@ -1,10 +1,13 @@
|
|||||||
import requests
|
import requests
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
|
import pendulum
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
_DISA_CRLS = "https://iasecontent.disa.mil/pki-pke/data/crls/dod_crldps.htm"
|
_DISA_CRLS = "https://iasecontent.disa.mil/pki-pke/data/crls/dod_crldps.htm"
|
||||||
|
|
||||||
|
MODIFIED_TIME_BUFFER = 15 * 60
|
||||||
|
|
||||||
|
|
||||||
def fetch_disa():
|
def fetch_disa():
|
||||||
response = requests.get(_DISA_CRLS)
|
response = requests.get(_DISA_CRLS)
|
||||||
@ -28,35 +31,66 @@ def crl_list_from_disa_html(html):
|
|||||||
parser.feed(html)
|
parser.feed(html)
|
||||||
return parser.crl_list
|
return parser.crl_list
|
||||||
|
|
||||||
|
|
||||||
def crl_local_path(out_dir, crl_location):
|
def crl_local_path(out_dir, crl_location):
|
||||||
name = re.split("/", crl_location)[-1]
|
name = re.split("/", crl_location)[-1]
|
||||||
crl = os.path.join(out_dir, name)
|
crl = os.path.join(out_dir, name)
|
||||||
return crl
|
return crl
|
||||||
|
|
||||||
def write_crl(out_dir, crl_location):
|
|
||||||
|
def existing_crl_modification_time(crl):
|
||||||
|
if os.path.exists(crl):
|
||||||
|
prev_time = os.path.getmtime(crl)
|
||||||
|
buffered = prev_time + MODIFIED_TIME_BUFFER
|
||||||
|
mod_time = prev_time if pendulum.now().timestamp() < buffered else buffered
|
||||||
|
dt = pendulum.from_timestamp(mod_time, tz="GMT")
|
||||||
|
return dt.format("ddd, DD MMM YYYY HH:mm:ss zz")
|
||||||
|
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def write_crl(out_dir, target_dir, crl_location):
|
||||||
crl = crl_local_path(out_dir, crl_location)
|
crl = crl_local_path(out_dir, crl_location)
|
||||||
with requests.get(crl_location, stream=True) as r:
|
existing = crl_local_path(target_dir, crl_location)
|
||||||
|
options = {"stream": True}
|
||||||
|
mod_time = existing_crl_modification_time(existing)
|
||||||
|
if mod_time:
|
||||||
|
options["headers"] = {"If-Modified-Since": mod_time}
|
||||||
|
|
||||||
|
with requests.get(crl_location, **options) as response:
|
||||||
|
if response.status_code == 304:
|
||||||
|
return False
|
||||||
|
|
||||||
with open(crl, "wb") as crl_file:
|
with open(crl, "wb") as crl_file:
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
for chunk in response.iter_content(chunk_size=1024):
|
||||||
if chunk:
|
if chunk:
|
||||||
crl_file.write(chunk)
|
crl_file.write(chunk)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
def remove_bad_crl(out_dir, crl_location):
|
def remove_bad_crl(out_dir, crl_location):
|
||||||
crl = crl_local_path(out_dir, crl_location)
|
crl = crl_local_path(out_dir, crl_location)
|
||||||
os.remove(crl)
|
os.remove(crl)
|
||||||
|
|
||||||
def refresh_crls(out_dir, logger=None):
|
|
||||||
|
def refresh_crls(out_dir, target_dir, logger):
|
||||||
disa_html = fetch_disa()
|
disa_html = fetch_disa()
|
||||||
crl_list = crl_list_from_disa_html(disa_html)
|
crl_list = crl_list_from_disa_html(disa_html)
|
||||||
for crl_location in crl_list:
|
for crl_location in crl_list:
|
||||||
if logger:
|
logger.info("updating CRL from {}".format(crl_location))
|
||||||
logger.info("updating CRL from {}".format(crl_location))
|
|
||||||
try:
|
try:
|
||||||
write_crl(out_dir, crl_location)
|
if write_crl(out_dir, target_dir, crl_location):
|
||||||
|
logger.info("successfully synced CRL from {}".format(crl_location))
|
||||||
|
else:
|
||||||
|
logger.info("no updates for CRL from {}".format(crl_location))
|
||||||
except requests.exceptions.ChunkedEncodingError:
|
except requests.exceptions.ChunkedEncodingError:
|
||||||
if logger:
|
if logger:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Error downloading {}, continuing anyway".format(crl_location)
|
"Error downloading {}, removing file and continuing anyway".format(
|
||||||
|
crl_location
|
||||||
|
)
|
||||||
)
|
)
|
||||||
remove_bad_crl(out_dir, crl_location)
|
remove_bad_crl(out_dir, crl_location)
|
||||||
|
|
||||||
@ -71,7 +105,7 @@ if __name__ == "__main__":
|
|||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
logger.info("Updating CRLs")
|
logger.info("Updating CRLs")
|
||||||
try:
|
try:
|
||||||
refresh_crls(sys.argv[1], logger=logger)
|
refresh_crls(sys.argv[1], sys.argv[2], logger)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.exception("Fatal error encountered, stopping")
|
logger.exception("Fatal error encountered, stopping")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
@ -5,7 +5,7 @@ set -e
|
|||||||
cd "$(dirname "$0")/.."
|
cd "$(dirname "$0")/.."
|
||||||
|
|
||||||
mkdir -p crl-tmp
|
mkdir -p crl-tmp
|
||||||
pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp
|
pipenv run python ./atst/domain/authnid/crl/util.py crl-tmp crl
|
||||||
mkdir -p crl
|
mkdir -p crl
|
||||||
rsync -rq --min-size 400 crl-tmp/. crl/.
|
rsync -rq --min-size 400 crl-tmp/. crl/.
|
||||||
rm -rf crl-tmp
|
rm -rf crl-tmp
|
||||||
|
@ -66,8 +66,9 @@ def test_parse_disa_pki_list():
|
|||||||
assert len(crl_list) == len(href_matches)
|
assert len(crl_list) == len(href_matches)
|
||||||
|
|
||||||
class MockStreamingResponse():
|
class MockStreamingResponse():
|
||||||
def __init__(self, content_chunks):
|
def __init__(self, content_chunks, code=200):
|
||||||
self.content_chunks = content_chunks
|
self.content_chunks = content_chunks
|
||||||
|
self.status_code = code
|
||||||
|
|
||||||
def iter_content(self, chunk_size=0):
|
def iter_content(self, chunk_size=0):
|
||||||
return self.content_chunks
|
return self.content_chunks
|
||||||
@ -81,6 +82,10 @@ class MockStreamingResponse():
|
|||||||
def test_write_crl(tmpdir, monkeypatch):
|
def test_write_crl(tmpdir, monkeypatch):
|
||||||
monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked']))
|
monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked']))
|
||||||
crl = 'crl_1'
|
crl = 'crl_1'
|
||||||
util.write_crl(tmpdir, crl)
|
assert util.write_crl(tmpdir, "random_target_dir", crl)
|
||||||
assert [p.basename for p in tmpdir.listdir()] == [crl]
|
assert [p.basename for p in tmpdir.listdir()] == [crl]
|
||||||
assert [p.read() for p in tmpdir.listdir()] == ['it worked']
|
assert [p.read() for p in tmpdir.listdir()] == ['it worked']
|
||||||
|
|
||||||
|
def test_skips_crl_if_it_has_not_been_modified(tmpdir, monkeypatch):
|
||||||
|
monkeypatch.setattr('requests.get', lambda u, **kwargs: MockStreamingResponse([b'it worked'], 304))
|
||||||
|
assert not util.write_crl(tmpdir, "random_target_dir", 'crl_file_name')
|
||||||
|
Loading…
x
Reference in New Issue
Block a user