From 0b5acde4c40e778096ad7d378ceffb1a4e552e71 Mon Sep 17 00:00:00 2001 From: dandds Date: Thu, 31 Oct 2019 08:44:44 -0400 Subject: [PATCH] Stream-parse CRLs for caching file locations. AT-AT needs to maintain a key-value CRL cache where each key is the DER byte-string of the issuer and the value is a dictionary of the CRL file path and expiration. This way when it checks a client certificate, it can load the correct CRL by comparing the issuers. This is preferable to loading all of the CRLs in-memory. However, it still requires that AT-AT load and parse each CRL when the application boots. Because of the size of the CRLs and their parsed, in-memory size, this leads to the application spiking to use nearly 900MB of memory (resting usage is around 50MB). This change introduces a small function to ad-hoc parse the CRL and obtain the information in the CRL we need: the issuer and the expiration. It does this by reading the CRL byte-by-byte until it reaches the ASN1 sequence that corresponds to the issuer, and then looks ahead to find the nextUpdate field (i.e., the expiration date). The CRLCache class uses this function to build its cache and JSON-serializes the cache to disk. If another AT-AT application process finds the serialized version, it will load that copy instead of rebuilding it. It also entails a change to the function signature for the init method of CRLCache: now it expects the CRL directory as its second argument, instead of a list of locations. The Python script invoked by `script/sync-crls` will rebuild the location cache each time it's run. This means that when the Kubernetes CronJob for CRLs runs, it will refresh the cache each time. When a new application container boots, it will get the refreshed cache. This also adds a nightly CircleCI job to sync the CRLs and test that the ad-hoc parsing function returns the same result as a proper parsing using the Python cryptography library. This provides extra insurance that the function is returning correct results on real data. --- .circleci/config.yml | 36 +++++++++ .gitignore | 1 + atst/app.py | 8 +- atst/domain/authnid/crl/__init__.py | 26 +++--- atst/domain/authnid/crl/util.py | 120 +++++++++++++++++++++++++++- tests/check_crl_parse.py | 17 ++++ tests/domain/authnid/test_crl.py | 105 +++++++++++++++++------- tests/fixtures/crl/.keep | 0 tests/test_auth.py | 7 +- tests/utils.py | 8 ++ 10 files changed, 277 insertions(+), 51 deletions(-) create mode 100644 tests/check_crl_parse.py create mode 100644 tests/fixtures/crl/.keep diff --git a/.circleci/config.yml b/.circleci/config.yml index da7d9d58..4aefe461 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -138,6 +138,28 @@ jobs: --network atat \ ghostinspector/test-runner-standalone:latest + test-crl-parser: + docker: + - image: docker:18.06.0-ce-git + - image: circleci/postgres:10-alpine-ram + - image: circleci/redis:4-alpine3.8 + steps: + - setup_remote_docker: + docker_layer_caching: true + version: 18.06.0-ce + - restore_docker_image + - setup_datastores: + pgdatabase: atat_test + - run: + name: Sync CRLs and run CRL test + command: | + docker run \ + -e PGHOST=postgres \ + -e REDIS_URI=redis://redis:6379 \ + --network atat \ + atat:builder \ + /bin/sh -c "pipenv install --dev && /bin/sh script/sync-crls && pipenv run pytest --no-cov tests/check_crl_parse.py" + deploy: docker: - image: docker:18.06.0-ce-git @@ -226,3 +248,17 @@ workflows: branches: only: - master + + test-crl-parser: + triggers: + - schedule: + cron: "0 4 * * *" + filters: + branches: + only: + - master + jobs: + - docker-build + - test-crl-parser: + requires: + - docker-build diff --git a/.gitignore b/.gitignore index 6d69a803..19f4acc5 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ config/dev.ini /crls /crl-tmp *.bk +crl_locations.json # test CA config ssl/client-certs/*.srl diff --git a/atst/app.py b/atst/app.py index 4be64f58..e58c43ec 100644 --- a/atst/app.py +++ b/atst/app.py @@ -1,6 +1,5 @@ import os import re -import pathlib from configparser import ConfigParser from datetime import datetime from flask import Flask, request, g, session @@ -247,11 +246,10 @@ def make_crl_validator(app): if app.config.get("DISABLE_CRL_CHECK"): app.crl_cache = NoOpCRLCache(logger=app.logger) else: - crl_locations = [] - for filename in pathlib.Path(app.config["CRL_STORAGE_CONTAINER"]).glob("*.crl"): - crl_locations.append(filename.absolute()) app.crl_cache = CRLCache( - app.config["CA_CHAIN"], crl_locations, logger=app.logger + app.config["CA_CHAIN"], + app.config["CRL_STORAGE_CONTAINER"], + logger=app.logger, ) diff --git a/atst/domain/authnid/crl/__init__.py b/atst/domain/authnid/crl/__init__.py index 7865ef14..6ac18aba 100644 --- a/atst/domain/authnid/crl/__init__.py +++ b/atst/domain/authnid/crl/__init__.py @@ -1,11 +1,13 @@ -import sys import os import re import hashlib import logging -from flask import current_app as app -from datetime import datetime + from OpenSSL import crypto, SSL +from datetime import datetime +from flask import current_app as app + +from .util import load_crl_locations_cache, serialize_crl_locations_cache # error codes from OpenSSL: https://github.com/openssl/openssl/blob/2c75f03b39de2fa7d006bc0f0d7c58235a54d9bb/include/openssl/x509_vfy.h#L111 CRL_EXPIRED_ERROR_CODE = 12 @@ -70,12 +72,12 @@ class CRLCache(CRLInterface): def __init__( self, root_location, - crl_locations=[], + crl_dir, store_class=crypto.X509Store, logger=None, crl_update_func=None, ): - self._crl_locations = crl_locations + self._crl_dir = crl_dir self.logger = logger self.store_class = store_class self.certificate_authorities = {} @@ -96,16 +98,10 @@ class CRLCache(CRLInterface): return [match.group(0) for match in self._PEM_RE.finditer(root_str)] def _build_crl_cache(self): - self.crl_cache = {} - for crl_location in self._crl_locations: - crl = self._load_crl(crl_location) - if crl: - issuer_der = crl.get_issuer().der() - expires = crl.to_cryptography().next_update - self.crl_cache[issuer_der] = { - "location": crl_location, - "expires": expires, - } + try: + self.crl_cache = load_crl_locations_cache(self._crl_dir) + except FileNotFoundError: + self.crl_cache = serialize_crl_locations_cache(self._crl_dir) def _load_crl(self, crl_location): with open(crl_location, "rb") as crl_file: diff --git a/atst/domain/authnid/crl/util.py b/atst/domain/authnid/crl/util.py index e9bc74fb..18e070c1 100644 --- a/atst/domain/authnid/crl/util.py +++ b/atst/domain/authnid/crl/util.py @@ -1,5 +1,7 @@ -import re +from datetime import datetime +import json import os +import re import pendulum import requests @@ -9,6 +11,10 @@ class CRLNotFoundError(Exception): pass +class CRLParseError(Exception): + pass + + MODIFIED_TIME_BUFFER = 15 * 60 @@ -70,6 +76,113 @@ CRL_LIST = [ ] +def scan_for_issuer_and_next_update(crl): + """ + Scans a CRL file byte-by-byte to find the issuer and nextUpdate fields. + + Per RFC 5280, the issuer is the fourth ASN1 sequence element to occur in a + DER-encoded CRL file (https://tools.ietf.org/html/rfc5280#section-5.1). + This function takes a brute-force approach and scans the file until if find + the fourth sequence, then begins collecting that and the following bytes to + construct the issuer. It stop doing this when it finds \x17, which begins + the thisUpdate field. It then scans for the next UTCTime element (the next + occurrence of \x17) and grabs the 13 following bytes. It parses the ASN1 + UTCTime byte string to derive a datetime object. + + :param crl: + The path to a CRL file on-disk. + + :return: + A two-element tuple. The first element is the raw DER bytes of the + issuer, the second is the parsed Python datetime object for nextUpdate. + """ + with open(crl, "rb") as f: + byte = f.read(1) + sequences = 0 + issuer_finished = False + issuer = b"" + while byte: + if not issuer_finished: + if byte == b"0" and sequences < 4: + sequences += 1 + + if byte == b"\x17" and sequences == 4: + issuer_finished = True + + if sequences == 4 and not issuer_finished: + issuer += byte + else: + if byte == b"\x17": + byte_str = f.read(13) + next_update = datetime.strptime( + byte_str[1:].decode(), "%y%m%d%H%M%S" + ) + return (issuer, next_update) + + byte = f.read(1) + + raise CRLParseError("CRL could not be scanned.") + + +def build_crl_locations_cache(crl_locations, logger=None): + crl_cache = {} + for crl_location in crl_locations: + try: + issuer_der, next_update = scan_for_issuer_and_next_update(crl_location) + crl_cache[issuer_der] = {"location": crl_location, "expires": next_update} + except CRLParseError: + if logger: + logger.warning( + "CRL could not be scanned for caching: {}".format(crl_location) + ) + continue + + return crl_cache + + +JSON_CACHE = "crl_locations.json" + + +def _serialize_cache_items(cache): + return { + der.hex(): { + k: v.timestamp() if hasattr(v, "timestamp") else v + for (k, v) in data.items() + } + for (der, data) in cache.items() + } + + +def _deserialize_cache_items(cache): + return { + bytes.fromhex(der): { + k: datetime.fromtimestamp(v) if isinstance(v, float) else v + for (k, v) in data.items() + } + for (der, data) in cache.items() + } + + +def serialize_crl_locations_cache(crl_dir, logger=None): + crl_locations = [ + "{}/{}".format(crl_dir, crl_path) for crl_path in os.listdir(crl_dir) + ] + location_cache = build_crl_locations_cache(crl_locations, logger=logger) + json_location = "{}/{}".format(crl_dir, JSON_CACHE) + with open(json_location, "w") as json_file: + json_ready = _serialize_cache_items(location_cache) + json.dump(json_ready, json_file) + + return location_cache + + +def load_crl_locations_cache(crl_dir): + json_location = "{}/{}".format(crl_dir, JSON_CACHE) + with open(json_location, "r") as json_file: + cache = json.load(json_file) + return _deserialize_cache_items(cache) + + def crl_local_path(out_dir, crl_location): name = re.split("/", crl_location)[-1] crl = os.path.join(out_dir, name) @@ -150,7 +263,10 @@ if __name__ == "__main__": logger = logging.getLogger() logger.info("Updating CRLs") try: - refresh_crls(sys.argv[1], sys.argv[2], logger) + tmp_location = sys.argv[1] + final_location = sys.argv[2] + refresh_crls(tmp_location, final_location, logger) + serialize_crl_locations_cache(tmp_location, logger=logger) except Exception as err: logger.exception("Fatal error encountered, stopping") sys.exit(1) diff --git a/tests/check_crl_parse.py b/tests/check_crl_parse.py new file mode 100644 index 00000000..ebf6bd03 --- /dev/null +++ b/tests/check_crl_parse.py @@ -0,0 +1,17 @@ +import os +import pytest + +from atst.domain.authnid.crl.util import scan_for_issuer_and_next_update + +from tests.utils import parse_for_issuer_and_next_update + + +CRL_DIR = "crls" +_CRLS = ["{}/{}".format(CRL_DIR, file_) for file_ in os.listdir(CRL_DIR)] + + +@pytest.mark.parametrize("crl_path", _CRLS) +def test_crl_scan_against_parse(crl_path): + parsed_der = parse_for_issuer_and_next_update(crl_path) + scanned_der = scan_for_issuer_and_next_update(crl_path) + assert parsed_der == scanned_der diff --git a/tests/domain/authnid/test_crl.py b/tests/domain/authnid/test_crl.py index 25a686cf..0460fc30 100644 --- a/tests/domain/authnid/test_crl.py +++ b/tests/domain/authnid/test_crl.py @@ -5,6 +5,7 @@ import os import shutil from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives.serialization import Encoding +from OpenSSL import crypto from atst.domain.authnid.crl import ( CRLCache, @@ -12,9 +13,17 @@ from atst.domain.authnid.crl import ( CRLInvalidException, NoOpCRLCache, ) +from atst.domain.authnid.crl.util import ( + scan_for_issuer_and_next_update, + build_crl_locations_cache, + serialize_crl_locations_cache, + load_crl_locations_cache, + CRLParseError, + JSON_CACHE, +) from tests.mocks import FIXTURE_EMAIL_ADDRESS, DOD_CN -from tests.utils import FakeLogger +from tests.utils import FakeLogger, parse_for_issuer_and_next_update class MockX509Store: @@ -34,7 +43,9 @@ class MockX509Store: def test_can_build_crl_list(crl_file, ca_key, ca_file, make_crl, tmpdir): crl = make_crl(ca_key) - cache = CRLCache(ca_file, crl_locations=[crl_file], store_class=MockX509Store) + dir_ = os.path.dirname(crl_file) + serialize_crl_locations_cache(dir_) + cache = CRLCache(ca_file, dir_, store_class=MockX509Store) issuer_der = crl.issuer.public_bytes(default_backend()) assert len(cache.crl_cache.keys()) == 1 assert issuer_der in cache.crl_cache @@ -42,26 +53,16 @@ def test_can_build_crl_list(crl_file, ca_key, ca_file, make_crl, tmpdir): assert cache.crl_cache[issuer_der]["expires"] == crl.next_update -def test_can_build_trusted_root_list(): +def test_can_build_trusted_root_list(app): location = "ssl/server-certs/ca-chain.pem" cache = CRLCache( - root_location=location, crl_locations=[], store_class=MockX509Store + location, app.config["CRL_STORAGE_CONTAINER"], store_class=MockX509Store ) with open(location) as f: content = f.read() assert len(cache.certificate_authorities.keys()) == content.count("BEGIN CERT") -def test_can_build_crl_list_with_missing_crls(): - location = "ssl/client-certs/client-ca.der.crl" - cache = CRLCache( - "ssl/client-certs/client-ca.crt", - crl_locations=["tests/fixtures/sample.pdf"], - store_class=MockX509Store, - ) - assert len(cache.crl_cache.keys()) == 0 - - def test_crl_validation_on_login( app, client, @@ -78,8 +79,9 @@ def test_crl_validation_on_login( crl = make_crl(ca_key, expired_serials=[bad_cert.serial_number]) serialize_pki_object_to_disk(crl, crl_file, encoding=Encoding.DER) + crl_dir = os.path.dirname(crl_file) - cache = CRLCache(ca_file, crl_locations=[crl_file]) + cache = CRLCache(ca_file, crl_dir) assert cache.crl_check(good_cert.public_bytes(Encoding.PEM).decode()) with pytest.raises(CRLRevocationException): cache.crl_check(bad_cert.public_bytes(Encoding.PEM).decode()) @@ -94,7 +96,8 @@ def test_can_dynamically_update_crls( make_crl, serialize_pki_object_to_disk, ): - cache = CRLCache(ca_file, crl_locations=[crl_file]) + crl_dir = os.path.dirname(crl_file) + cache = CRLCache(ca_file, crl_dir) client_cert = make_x509(rsa_key(), signer_key=ca_key, cn="chewbacca") client_pem = client_cert.public_bytes(Encoding.PEM) assert cache.crl_check(client_pem) @@ -107,8 +110,10 @@ def test_can_dynamically_update_crls( assert cache.crl_check(client_pem) -def test_throws_error_for_missing_issuer(): - cache = CRLCache("ssl/server-certs/ca-chain.pem", crl_locations=[]) +def test_throws_error_for_missing_issuer(app): + cache = CRLCache( + "ssl/server-certs/ca-chain.pem", app.config["CRL_STORAGE_CONTAINER"] + ) # this cert is self-signed, and so the application does not have a # corresponding CRL for it cert = open("tests/fixtures/{}.crt".format(FIXTURE_EMAIL_ADDRESS), "rb").read() @@ -123,10 +128,7 @@ def test_throws_error_for_missing_issuer(): def test_multistep_certificate_chain(): - cache = CRLCache( - "tests/fixtures/chain/ca-chain.pem", - crl_locations=["tests/fixtures/chain/intermediate.crl"], - ) + cache = CRLCache("tests/fixtures/chain/ca-chain.pem", "tests/fixtures/chain/") cert = open("tests/fixtures/chain/client.crt", "rb").read() assert cache.crl_check(cert) @@ -144,7 +146,8 @@ def test_expired_crl_raises_CRLInvalidException_with_failover_config_false( ): client_cert = make_x509(rsa_key(), signer_key=ca_key, cn="chewbacca") client_pem = client_cert.public_bytes(Encoding.PEM) - crl_cache = CRLCache(ca_file, crl_locations=[expired_crl_file]) + crl_dir = os.path.dirname(expired_crl_file) + crl_cache = CRLCache(ca_file, crl_dir) with pytest.raises(CRLInvalidException): crl_cache.crl_check(client_pem) @@ -154,7 +157,8 @@ def test_expired_crl_passes_with_failover_config_true( ): client_cert = make_x509(rsa_key(), signer_key=ca_key, cn="chewbacca") client_pem = client_cert.public_bytes(Encoding.PEM) - crl_cache = CRLCache(ca_file, crl_locations=[expired_crl_file]) + crl_dir = os.path.dirname(expired_crl_file) + crl_cache = CRLCache(ca_file, crl_dir) assert crl_cache.crl_check(client_pem) @@ -173,7 +177,54 @@ def test_updates_expired_certs( def _crl_update_func(): shutil.copyfile(crl_file, expired_crl_file) - crl_cache = CRLCache( - ca_file, crl_locations=[expired_crl_file], crl_update_func=_crl_update_func - ) + crl_dir = os.path.dirname(expired_crl_file) + crl_cache = CRLCache(ca_file, crl_dir, crl_update_func=_crl_update_func) crl_cache.crl_check(client_pem) + + +def test_scan_for_issuer_and_next_update(crl_file): + parsed = parse_for_issuer_and_next_update(crl_file) + scanned = scan_for_issuer_and_next_update(crl_file) + assert parsed == scanned + + +@pytest.fixture +def bad_crl(tmpdir): + bad_file = tmpdir.join("bad.crl") + with open(bad_file, "wb") as bad: + bad.write(b"definitely not a crl") + + return bad_file + + +def test_scan_for_issuer_and_next_update_with_bad_data(bad_crl): + with pytest.raises(CRLParseError): + scan_for_issuer_and_next_update(bad_crl) + + +def test_build_crl_locations_cache(crl_file): + issuer_der, next_update = parse_for_issuer_and_next_update(crl_file) + cache = build_crl_locations_cache([crl_file]) + assert cache == {issuer_der: {"location": crl_file, "expires": next_update}} + + +def test_build_crl_locations_cache_with_bad_data(crl_file, bad_crl): + logger = FakeLogger() + issuer_der, next_update = parse_for_issuer_and_next_update(crl_file) + cache = build_crl_locations_cache([crl_file, bad_crl], logger=logger) + assert cache == {issuer_der: {"location": crl_file, "expires": next_update}} + assert logger.messages + assert str(bad_crl) in logger.messages[0] + + +def test_serialize_crl_locations_cache(crl_file, bad_crl): + dir_ = os.path.dirname(crl_file) + serialize_crl_locations_cache(dir_) + assert os.path.isfile("{}/{}".format(dir_, JSON_CACHE)) + + +def test_load_crl_locations_cache(crl_file, bad_crl): + dir_ = os.path.dirname(crl_file) + serialize_crl_locations_cache(dir_) + cache = load_crl_locations_cache(dir_) + assert isinstance(cache, dict) diff --git a/tests/fixtures/crl/.keep b/tests/fixtures/crl/.keep new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_auth.py b/tests/test_auth.py index 903cb23d..de1c03f5 100644 --- a/tests/test_auth.py +++ b/tests/test_auth.py @@ -1,3 +1,4 @@ +import os from urllib.parse import urlparse import pytest @@ -147,7 +148,8 @@ def swap_crl_cache( else: crl = make_crl(ca_key) serialize_pki_object_to_disk(crl, crl_file, encoding=Encoding.DER) - app.crl_cache = CRLCache(ca_file, crl_locations=[crl_file]) + crl_dir = os.path.dirname(crl_file) + app.crl_cache = CRLCache(ca_file, crl_dir) yield _swap_crl_cache @@ -172,7 +174,8 @@ def test_crl_validation_on_login( crl = make_crl(ca_key, expired_serials=[bad_cert.serial_number]) serialize_pki_object_to_disk(crl, crl_file, encoding=Encoding.DER) - cache = CRLCache(ca_file, crl_locations=[crl_file]) + crl_dir = os.path.dirname(crl_file) + cache = CRLCache(ca_file, crl_dir) swap_crl_cache(cache) # bad cert is on the test CRL diff --git a/tests/utils.py b/tests/utils.py index 913a80e7..1a36b819 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,7 @@ from flask import template_rendered from contextlib import contextmanager from unittest.mock import Mock +from OpenSSL import crypto from atst.utils.notification_sender import NotificationSender @@ -43,3 +44,10 @@ class FakeLogger: FakeNotificationSender = lambda: Mock(spec=NotificationSender) + + +def parse_for_issuer_and_next_update(crl): + with open(crl, "rb") as crl_file: + parsed = crypto.load_crl(crypto.FILETYPE_ASN1, crl_file.read()) + next_update = parsed.to_cryptography().next_update + return (parsed.get_issuer().der(), next_update)