Merge pull request #1074 from dod-ccpo/lock-environments
Implement simple locking system for environments
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy import text, func, or_
|
||||
from sqlalchemy.orm.exc import NoResultFound
|
||||
from sqlalchemy.orm import load_only
|
||||
from typing import List
|
||||
from uuid import UUID
|
||||
|
||||
from atst.database import db
|
||||
from atst.models import Environment, Application, Portfolio, TaskOrder, CLIN
|
||||
@@ -97,44 +97,53 @@ class Environments(object):
|
||||
@classmethod
|
||||
def base_provision_query(cls, now):
|
||||
return (
|
||||
db.session.query(Environment)
|
||||
db.session.query(Environment.id)
|
||||
.join(Application)
|
||||
.join(Portfolio)
|
||||
.join(TaskOrder)
|
||||
.join(CLIN)
|
||||
.filter(CLIN.start_date <= now)
|
||||
.filter(CLIN.end_date > now)
|
||||
# select only these columns
|
||||
.options(load_only("id", "creator_id"))
|
||||
.filter(
|
||||
or_(
|
||||
Environment.claimed_until == None,
|
||||
Environment.claimed_until <= func.now(),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_environments_pending_creation(cls, now) -> List[Environment]:
|
||||
def get_environments_pending_creation(cls, now) -> List[UUID]:
|
||||
"""
|
||||
Any environment with an active CLIN that doesn't yet have a `cloud_id`.
|
||||
"""
|
||||
return cls.base_provision_query(now).filter(Environment.cloud_id == None).all()
|
||||
results = (
|
||||
cls.base_provision_query(now).filter(Environment.cloud_id == None).all()
|
||||
)
|
||||
return [id_ for id_, in results]
|
||||
|
||||
@classmethod
|
||||
def get_environments_pending_atat_user_creation(cls, now) -> List[Environment]:
|
||||
def get_environments_pending_atat_user_creation(cls, now) -> List[UUID]:
|
||||
"""
|
||||
Any environment with an active CLIN that has a cloud_id but no `root_user_info`.
|
||||
"""
|
||||
return (
|
||||
results = (
|
||||
cls.base_provision_query(now)
|
||||
.filter(Environment.cloud_id != None)
|
||||
.filter(Environment.root_user_info == text("'null'"))
|
||||
).all()
|
||||
return [id_ for id_, in results]
|
||||
|
||||
@classmethod
|
||||
def get_environments_pending_baseline_creation(cls, now) -> List[Environment]:
|
||||
def get_environments_pending_baseline_creation(cls, now) -> List[UUID]:
|
||||
"""
|
||||
Any environment with an active CLIN that has a `cloud_id` and `root_user_info`
|
||||
but no `baseline_info`.
|
||||
"""
|
||||
return (
|
||||
results = (
|
||||
cls.base_provision_query(now)
|
||||
.filter(Environment.cloud_id != None)
|
||||
.filter(Environment.root_user_info != text("'null'"))
|
||||
.filter(Environment.baseline_info == text("'null'"))
|
||||
).all()
|
||||
return [id_ for id_, in results]
|
||||
|
@@ -44,3 +44,12 @@ class NoAccessError(Exception):
|
||||
@property
|
||||
def message(self):
|
||||
return "Route for {} cannot be accessed".format(self.resource_name)
|
||||
|
||||
|
||||
class ClaimFailedException(Exception):
|
||||
def __init__(self, resource):
|
||||
self.resource = resource
|
||||
message = (
|
||||
f"Could not acquire claim for {resource.__class__.__name__} {resource.id}."
|
||||
)
|
||||
super().__init__(message)
|
||||
|
106
atst/jobs.py
106
atst/jobs.py
@@ -6,7 +6,7 @@ from atst.queue import celery
|
||||
from atst.models import EnvironmentJobFailure, EnvironmentRoleJobFailure
|
||||
from atst.domain.csp.cloud import CloudProviderInterface, GeneralCSPException
|
||||
from atst.domain.environments import Environments
|
||||
from atst.domain.users import Users
|
||||
from atst.models.utils import claim_for_update
|
||||
|
||||
|
||||
class RecordEnvironmentFailure(celery.Task):
|
||||
@@ -44,58 +44,61 @@ def send_notification_mail(recipients, subject, body):
|
||||
app.mailer.send(recipients, subject, body)
|
||||
|
||||
|
||||
def do_create_environment(
|
||||
csp: CloudProviderInterface, environment_id=None, atat_user_id=None
|
||||
):
|
||||
def do_create_environment(csp: CloudProviderInterface, environment_id=None):
|
||||
environment = Environments.get(environment_id)
|
||||
|
||||
if environment.cloud_id is not None:
|
||||
# TODO: Return value for this?
|
||||
return
|
||||
with claim_for_update(environment) as environment:
|
||||
|
||||
user = Users.get(atat_user_id)
|
||||
if environment.cloud_id is not None:
|
||||
# TODO: Return value for this?
|
||||
return
|
||||
|
||||
# we'll need to do some checking in this job for cases where it's retrying
|
||||
# when a failure occured after some successful steps
|
||||
# (e.g. if environment.cloud_id is not None, then we can skip first step)
|
||||
user = environment.creator
|
||||
|
||||
# credentials either from a given user or pulled from config?
|
||||
# if using global creds, do we need to log what user authorized action?
|
||||
atat_root_creds = csp.root_creds()
|
||||
# we'll need to do some checking in this job for cases where it's retrying
|
||||
# when a failure occured after some successful steps
|
||||
# (e.g. if environment.cloud_id is not None, then we can skip first step)
|
||||
|
||||
# user is needed because baseline root account in the environment will
|
||||
# be assigned to the requesting user, open question how to handle duplicate
|
||||
# email addresses across new environments
|
||||
csp_environment_id = csp.create_environment(atat_root_creds, user, environment)
|
||||
environment.cloud_id = csp_environment_id
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
# credentials either from a given user or pulled from config?
|
||||
# if using global creds, do we need to log what user authorized action?
|
||||
atat_root_creds = csp.root_creds()
|
||||
|
||||
# user is needed because baseline root account in the environment will
|
||||
# be assigned to the requesting user, open question how to handle duplicate
|
||||
# email addresses across new environments
|
||||
csp_environment_id = csp.create_environment(atat_root_creds, user, environment)
|
||||
environment.cloud_id = csp_environment_id
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def do_create_atat_admin_user(csp: CloudProviderInterface, environment_id=None):
|
||||
environment = Environments.get(environment_id)
|
||||
atat_root_creds = csp.root_creds()
|
||||
|
||||
atat_remote_root_user = csp.create_atat_admin_user(
|
||||
atat_root_creds, environment.cloud_id
|
||||
)
|
||||
environment.root_user_info = atat_remote_root_user
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
with claim_for_update(environment) as environment:
|
||||
atat_root_creds = csp.root_creds()
|
||||
|
||||
atat_remote_root_user = csp.create_atat_admin_user(
|
||||
atat_root_creds, environment.cloud_id
|
||||
)
|
||||
environment.root_user_info = atat_remote_root_user
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def do_create_environment_baseline(csp: CloudProviderInterface, environment_id=None):
|
||||
environment = Environments.get(environment_id)
|
||||
|
||||
# ASAP switch to use remote root user for provisioning
|
||||
atat_remote_root_creds = environment.root_user_info["credentials"]
|
||||
with claim_for_update(environment) as environment:
|
||||
# ASAP switch to use remote root user for provisioning
|
||||
atat_remote_root_creds = environment.root_user_info["credentials"]
|
||||
|
||||
baseline_info = csp.create_environment_baseline(
|
||||
atat_remote_root_creds, environment.cloud_id
|
||||
)
|
||||
environment.baseline_info = baseline_info
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
baseline_info = csp.create_environment_baseline(
|
||||
atat_remote_root_creds, environment.cloud_id
|
||||
)
|
||||
environment.baseline_info = baseline_info
|
||||
db.session.add(environment)
|
||||
db.session.commit()
|
||||
|
||||
|
||||
def do_work(fn, task, csp, **kwargs):
|
||||
@@ -106,39 +109,46 @@ def do_work(fn, task, csp, **kwargs):
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def create_environment(self, environment_id=None, atat_user_id=None):
|
||||
do_work(do_create_environment, self, app.csp.cloud, **kwargs)
|
||||
def create_environment(self, environment_id=None):
|
||||
do_work(do_create_environment, self, app.csp.cloud, environment_id=environment_id)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def create_atat_admin_user(self, environment_id=None):
|
||||
do_work(do_create_atat_admin_user, self, app.csp.cloud, **kwargs)
|
||||
do_work(
|
||||
do_create_atat_admin_user, self, app.csp.cloud, environment_id=environment_id
|
||||
)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def create_environment_baseline(self, environment_id=None):
|
||||
do_work(do_create_environment_baseline, self, app.csp.cloud, **kwargs)
|
||||
do_work(
|
||||
do_create_environment_baseline,
|
||||
self,
|
||||
app.csp.cloud,
|
||||
environment_id=environment_id,
|
||||
)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def dispatch_create_environment(self):
|
||||
for environment in Environments.get_environments_pending_creation(pendulum.now()):
|
||||
create_environment.delay(
|
||||
environment_id=environment.id, atat_user_id=environment.creator_id
|
||||
)
|
||||
for environment_id in Environments.get_environments_pending_creation(
|
||||
pendulum.now()
|
||||
):
|
||||
create_environment.delay(environment_id=environment_id)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def dispatch_create_atat_admin_user(self):
|
||||
for environment in Environments.get_environments_pending_atat_user_creation(
|
||||
for environment_id in Environments.get_environments_pending_atat_user_creation(
|
||||
pendulum.now()
|
||||
):
|
||||
create_atat_admin_user.delay(environment_id=environment.id)
|
||||
create_atat_admin_user.delay(environment_id=environment_id)
|
||||
|
||||
|
||||
@celery.task(bind=True)
|
||||
def dispatch_create_environment_baseline(self):
|
||||
for environment in Environments.get_environments_pending_baseline_creation(
|
||||
for environment_id in Environments.get_environments_pending_baseline_creation(
|
||||
pendulum.now()
|
||||
):
|
||||
create_environment_baseline.delay(environment_id=environment.id)
|
||||
create_environment_baseline.delay(environment_id=environment_id)
|
||||
|
@@ -1,4 +1,4 @@
|
||||
from sqlalchemy import Column, ForeignKey, String
|
||||
from sqlalchemy import Column, ForeignKey, String, TIMESTAMP
|
||||
from sqlalchemy.orm import relationship
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from enum import Enum
|
||||
@@ -29,6 +29,8 @@ class Environment(
|
||||
root_user_info = Column(JSONB)
|
||||
baseline_info = Column(JSONB)
|
||||
|
||||
claimed_until = Column(TIMESTAMP(timezone=True))
|
||||
|
||||
job_failures = relationship("EnvironmentJobFailure")
|
||||
|
||||
class ProvisioningStatus(Enum):
|
||||
|
49
atst/models/utils.py
Normal file
49
atst/models/utils.py
Normal file
@@ -0,0 +1,49 @@
|
||||
from sqlalchemy import func, sql, Interval, and_, or_
|
||||
from contextlib import contextmanager
|
||||
|
||||
from atst.database import db
|
||||
from atst.domain.exceptions import ClaimFailedException
|
||||
|
||||
|
||||
@contextmanager
|
||||
def claim_for_update(resource, minutes=30):
|
||||
"""
|
||||
Claim a mutually exclusive expiring hold on a resource.
|
||||
Uses the database as a central source of time in case the server clocks have drifted.
|
||||
|
||||
Args:
|
||||
resource: A SQLAlchemy model instance with a `claimed_until` attribute.
|
||||
minutes: The maximum amount of time, in minutes, to hold the claim.
|
||||
"""
|
||||
Model = resource.__class__
|
||||
|
||||
claim_until = func.now() + func.cast(
|
||||
sql.functions.concat(minutes, " MINUTES"), Interval
|
||||
)
|
||||
|
||||
# Optimistically query for and update the resource in question. If it's
|
||||
# already claimed, `rows_updated` will be 0 and we can give up.
|
||||
rows_updated = (
|
||||
db.session.query(Model)
|
||||
.filter(
|
||||
and_(
|
||||
Model.id == resource.id,
|
||||
or_(Model.claimed_until == None, Model.claimed_until <= func.now()),
|
||||
)
|
||||
)
|
||||
.update({"claimed_until": claim_until}, synchronize_session="fetch")
|
||||
)
|
||||
if rows_updated < 1:
|
||||
raise ClaimFailedException(resource)
|
||||
|
||||
# Fetch the claimed resource
|
||||
claimed = db.session.query(Model).filter_by(id=resource.id).one()
|
||||
|
||||
try:
|
||||
# Give the resource to the caller.
|
||||
yield claimed
|
||||
finally:
|
||||
# Release the claim.
|
||||
db.session.query(Model).filter(Model.id == resource.id).filter(
|
||||
Model.claimed_until != None
|
||||
).update({"claimed_until": None}, synchronize_session="fetch")
|
Reference in New Issue
Block a user