import itertools
import logging
import math
import random
import string
import stix2generator.generation
import stix2generator.generation.constraints
import stix2generator.generation.semantics
from stix2generator.exceptions import (
CyclicSpecificationReferenceError, InvalidPropertyGroupError,
ObjectGenerationError, PresenceCoconstraintError,
SemanticValueTypeMismatchError, SpecificationNotFoundError,
TypeMismatchError, UndefinedPropertyError, UnrecognizedJSONTypeError,
ValueCoconstraintError
)
# lazy-initialized
_log = None
_NONE_TYPE = type(None)
# Legal values for the "type" property in specifications
# This and the values of _JSON_TYPE_MAP should of course be
# kept in sync.
_JSON_TYPES = {
"string",
"number",
"integer",
"boolean",
"object",
"array",
"null"
}
# Used to infer a JSON type from a python type
_JSON_TYPE_MAP = {
int: "integer",
float: "number",
str: "string",
bool: "boolean",
_NONE_TYPE: "null",
dict: "object",
list: "array"
}
[docs]class Config(stix2generator.generation.Config):
"""
Simple config class for the ObjectGenerator.
Most properties are self-explanatory. Some notes:
- The number settings apply to both "number" and "integer" specs
- optional_property_probability is the probability of inclusion of
optional properties. Must be a number in [0, 1].
- Setting minimize_ref_properties to True will cause the generator
to attempt to minimize reference properties in generated objects,
while still satisfying specification constraints. If a reference
property is necessary to satisfy a constraint, it will be included
regardless of this setting.
This setting overrides optional_property_probability for reference
properties: if minimizing reference properties and
optional_property_probability is 1 (meaning include all optional
properties), optional reference properties may nevertheless be
omitted.
"""
_DEFAULTS = {
"string_length_min": 5,
"string_length_max": 20,
"string_chars": string.ascii_letters,
"number_min": -1000.0,
"is_number_min_exclusive": False,
"number_max": 1000.0,
"is_number_max_exclusive": False,
"array_length_min": 1,
"array_length_max": 5,
"optional_property_probability": 0.5,
"minimize_ref_properties": True
}
[docs]class ObjectGenerator:
"""
Instances of this class generate JSON data from a specification. More
specifically, JSON-serializable Python structures are generated, which can
be converted to JSON text. So JSON objects are generated as dicts; arrays
as lists, etc.
"""
def __init__(
self, spec_registry=None, semantic_providers=None, config=None
):
"""
Initialize the generator.
:param spec_registry: A name->specification mapping used to look
up references inside of specifications
:param semantic_providers: A list of semantic providers (e.g.
instances of subclasses of SemanticsProvider)
:param config: A Config instance giving user settings regarding
generation. If None, defaults will be used.
"""
self.__specs = spec_registry or {}
self.__config = config or Config()
self.__semantics = {}
# Create a map for fast lookup from a semantic name to its provider
if semantic_providers:
for provider in semantic_providers:
semantics = provider.get_semantics()
for semantic in semantics:
self.__semantics[semantic] = provider
@property
def config(self):
"""
Get this generator's config object.
"""
# If a public property, semantics implementations can use the config to
# create values compatible with the invoking generator's settings.
# Might be helpful?
return self.__config
@property
def spec_names(self):
"""
Get a read-only iterable view of this generator's supported spec
names.
:return: An iterable of spec names
"""
return self.__specs.keys()
[docs] def generate(
self, spec_name, expected_type=None, spec_name_stack=None,
value_constraint=None
):
"""
Generate a value based on a specification identified by name. The name
is looked up in this generator's registry.
:param spec_name: The specification name
:param expected_type: If the named spec should produce a particular
JSON type, that type. If it doesn't matter, pass None. This
can be used to identify reference errors, where the reference is
to a specification for the wrong type of thing.
:param spec_name_stack: A stack of previously-visited specification
names, used for reference loop detection. Pass None to start a
new stack.
:param value_constraint: A ValueConstraint instance representing some
additional constraint to be honored by the generator. This is
derived from a value co-constraint expression. If None, there is
no additional constraint.
:return: The generated value
:raises stix2generator.exceptions.CyclicSpecificationReferenceError:
If a circular reference chain is detected
:raises stix2generator.exceptions.SpecificationNotFoundError: If the
named specification is not found in the registry
:raises stix2generator.exceptions.ObjectGenerationError: For many other
types of generation errors
"""
if spec_name_stack is None:
spec_name_stack = []
# Reference loop check
if spec_name in spec_name_stack:
cycle = spec_name_stack[spec_name_stack.index(spec_name):]
cycle.append(spec_name)
raise CyclicSpecificationReferenceError(
cycle, spec_name_stack=spec_name_stack[:]
)
if spec_name not in self.__specs:
raise SpecificationNotFoundError(
spec_name, spec_name_stack=spec_name_stack
)
spec = self.__specs[spec_name]
try:
spec_name_stack.append(spec_name)
sdo_dict = self.generate_from_spec(
spec, expected_type=expected_type,
spec_name_stack=spec_name_stack,
value_constraint=value_constraint
)
spec_name_stack.pop()
except ObjectGenerationError as e:
# In a recursive context, set this at the deepest nesting level
# only
if not e.spec_name_stack:
e.spec_name_stack = spec_name_stack[:] # a copy, to be safe
raise
return sdo_dict
[docs] def generate_from_spec(
self, spec, expected_type=None, spec_name_stack=None,
value_constraint=None
):
"""
Generate a value based on the given specification, which need not exist
under any particular name in this generator's registry.
:param spec: The specification, as parsed JSON
:param expected_type: If the spec should be for a particular JSON type,
that type. If it doesn't matter, pass None.
:param spec_name_stack: A stack of previously-visited specification
names, used for reference loop detection. Pass None to start a
new stack.
:param value_constraint: A ValueConstraint instance representing some
additional constraint to be honored by the generator. This is
derived from a value co-constraint expression. If None, there is
no additional constraint.
:return: The generated value
:raises stix2generator.exceptions.UnrecognizedJSONTypeError: If given a
non-const dict spec whose declared type is not recognized as a
JSON type, or if expected_type is given and not a recognized JSON
type.
:raises stix2generator.exceptions.TypeMismatchError: If expected_type is
given and the spec type doesn't match.
:raises stix2generator.exceptions.ObjectGenerationError: For various
ways the given spec is invalid. Other types of errors are also
wrapped/chained from this exception type (if possible) so that
we get decoration with extra info from higher stack frames, which
is useful for diagnosing where those problems occur.
"""
spec_type = _get_spec_type(spec)
if expected_type:
if expected_type not in _JSON_TYPES:
raise UnrecognizedJSONTypeError(expected_type)
# There really should be some flexibility for numeric types: if
# number is expected, integers should be accepted too...
if spec_type != expected_type:
raise TypeMismatchError(
expected_type, spec_type
)
# If not a dict, the spec IS the desired value. It's an easy way to
# produce fixed values.
if not isinstance(spec, dict):
value = spec
# The other way: use "const", like in json-schema.
elif "const" in spec:
value = spec["const"]
else:
semantic_name = spec.get(
stix2generator.generation.semantics.SEMANTIC_PROPERTY_NAME
)
try:
if semantic_name:
value = self.__generate_semantic(spec, value_constraint)
else:
value = self.__generate_plain(
spec, spec_name_stack, value_constraint
)
except ObjectGenerationError as e:
# In a recursive context, set this at the deepest nesting
# level only. Also, I think it's better to use the semantic
# name as the type name in error messages, for semantic specs.
if not e.spec_type:
e.spec_type = semantic_name or spec_type
raise
except Exception as e:
raise ObjectGenerationError(
"An error occurred during generation: {}: {}".format(
type(e).__name__, str(e)
),
semantic_name or spec_type
) from e
return value
def __generate_semantic(self, spec, value_constraint):
"""
Generate from a semantic-type spec.
:param spec: The spec
:param value_constraint: A ValueConstraint instance representing some
additional constraint to be honored by the generator. This is
derived from a value co-constraint expression. If None, there is
no additional constraint.
:return: The generated value
:raises stix2generator.exceptions.SemanticValueTypeMismatchError: If the
semantic produces a value which doesn't agree with the spec's
declared type.
:raises stix2generator.exceptions.ObjectGenerationError: If the
semantic name isn't found in any of this generator's semantic
providers
"""
semantic = spec[
stix2generator.generation.semantics.SEMANTIC_PROPERTY_NAME
]
if semantic in self.__semantics:
provider = self.__semantics[semantic]
value = provider.create_semantic(spec, self, value_constraint)
# Should check that the implementation created the right type of
# value.
actual_type = _json_type_from_python_type(type(value))
if actual_type != spec["type"]:
raise SemanticValueTypeMismatchError(
semantic, actual_type, value, spec["type"]
)
else:
raise ObjectGenerationError(
"unrecognized semantic: " + semantic
)
return value
def __generate_plain(self, spec, spec_name_stack, value_constraint):
"""
Generate from a "plain" spec, i.e. one that isn't a semantic spec.
Maybe not a very good method name... (Of course, the spec can have
sub-specs or references to specs which are semantic.)
:param spec: The "plain" spec
:param spec_name_stack: Spec name stack, to propagate through
references (if any)
:param value_constraint: A ValueConstraint instance representing some
additional constraint to be honored by the generator. This is
derived from a value co-constraint expression. If None, there is
no additional constraint. This is *only* used to propagate through
ref/oneOf specifications. The "plain" generators (non-semantic)
ignore constraints.
:return: The generated value
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
type_ = spec["type"]
if "ref" in spec:
value = self.generate(
spec["ref"],
expected_type=type_,
spec_name_stack=spec_name_stack,
value_constraint=value_constraint
)
elif "oneOf" in spec:
# value of the "oneOf" property should be a list of specs.
sub_spec = random.choice(spec["oneOf"])
value = self.generate_from_spec(
sub_spec,
expected_type=type_,
spec_name_stack=spec_name_stack,
value_constraint=value_constraint
)
else:
generator = self.__GENERATOR_METHOD_MAP[type_]
value = generator(self, spec, spec_name_stack)
return value
[docs] def generate_object(self, object_spec, spec_name_stack=None):
"""
Generate a JSON object from the given specification.
:param object_spec: A JSON object specification
:param spec_name_stack: Specification name stack, for reference loop
detection. If None, use an empty stack.
:return: A dict
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
# Handle imports
if "import" in object_spec:
imported_spec_name = object_spec["import"]
gen_object = self.generate(
imported_spec_name,
expected_type="object",
spec_name_stack=spec_name_stack
)
else:
gen_object = {}
# First, determine which properties to include
names_to_include = _get_properties_to_include(
object_spec, self.config.optional_property_probability,
self.config.minimize_ref_properties
)
if names_to_include:
# Then, find values for the included properties, according to their
# specs and co-constraints.
value_coconstraints = _get_value_coconstraints(object_spec)
# At this point, if there were any names to include, there must
# have been some properties defined!
prop_specs = object_spec["properties"]
for prop_name in names_to_include:
# Generate constraint if necessary
constraint = _get_value_constraint(
prop_name, value_coconstraints, gen_object
)
gen_object[prop_name] = self.generate_from_spec(
prop_specs[prop_name], spec_name_stack=spec_name_stack,
value_constraint=constraint
)
return gen_object
[docs] def generate_array(self, array_spec, spec_name_stack=None):
"""
Generate a JSON array from the given specification.
:param array_spec: A JSON array specification
:param spec_name_stack: Specification name stack, for reference loop
detection. If None, use an empty stack.
:return: A list
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
item_spec = array_spec["items"]
has_min = "minItems" in array_spec
has_max = "maxItems" in array_spec
if (has_min and not has_max) or (not has_min and has_max):
raise ObjectGenerationError(
"Specification must include both or neither of the properties: "
"minItems, maxItems",
"array"
)
min_items = array_spec.get("minItems", self.config.array_length_min)
max_items = array_spec.get("maxItems", self.config.array_length_max)
if min_items > max_items:
raise ObjectGenerationError(
"minItems must be less than or equal to maxItems",
"array"
)
if min_items < 0 or max_items < 0:
raise ObjectGenerationError(
"minItems and maxItems must be non-negative: {}".format(
min_items if min_items < 0 else max_items
),
"array"
)
array = [
self.generate_from_spec(item_spec, spec_name_stack=spec_name_stack)
for _ in range(
random.randint(min_items, max_items)
)
]
return array
[docs] def generate_string(self, string_spec, spec_name_stack=None):
"""
Generate a string from the given specification.
:param string_spec: A string specification
:param spec_name_stack: A specification name stack, for reference loop
detection. Unused but included for API compatibility with
object/array generators.
:return: A string
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
has_min = "minLength" in string_spec
has_max = "maxLength" in string_spec
if (has_min and not has_max) or (not has_min and has_max):
raise ObjectGenerationError(
"Specification must include both or neither of the properties: "
"minLength, maxLength",
"string"
)
min_length = string_spec.get("minLength", self.config.string_length_min)
max_length = string_spec.get("maxLength", self.config.string_length_max)
if min_length > max_length:
raise ObjectGenerationError(
"minLength must be less than or equal to maxLength: {} <= {}"
.format(min_length, max_length),
"string"
)
if min_length < 0 or max_length < 0:
raise ObjectGenerationError(
"minLength and maxLength must be non-negative: {}".format(
min_length if min_length < 0 else max_length
),
"string"
)
s = "".join(
random.choice(self.config.string_chars)
for _ in range(
random.randint(min_length, max_length)
)
)
return s
[docs] def generate_integer(self, integer_spec, spec_name_stack=None):
"""
Generate an integer from the given specification.
:param integer_spec: An integer specification
:param spec_name_stack: A specification name stack, for reference loop
detection. Unused but included for API compatibility with
object/array generators.
:return: An int
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
min_, is_min_exclusive, max_, is_max_exclusive = \
_process_numeric_min_max_properties(
integer_spec,
self.config.number_min,
self.config.is_number_min_exclusive,
self.config.number_max,
self.config.is_number_max_exclusive
)
# Guess I won't assume the user expressed the bounds as ints, so I
# need to convert to ints and check the resulting bounds. The
# call above to process min/max properties doesn't assume we require
# ints.
if int(min_) == min_:
min_ = int(min_)
if is_min_exclusive:
min_ += 1
else:
min_ = int(math.ceil(min_))
if int(max_) == max_:
max_ = int(max_)
if is_max_exclusive:
max_ -= 1
else:
max_ = int(math.floor(max_))
if min_ > max_:
raise ObjectGenerationError(
"no integers exist in the specified interval",
"integer"
)
return random.randint(min_, max_)
[docs] def generate_number(self, number_spec, spec_name_stack=None):
"""
Generate a number (float) from the given specification.
:param number_spec: A number specification
:param spec_name_stack: A specification name stack, for reference loop
detection. Unused but included for API compatibility with
object/array generators.
:return: A float
:raises stix2generator.exceptions.ObjectGenerationError: If a
generation error occurs
"""
min_, is_min_exclusive, max_, is_max_exclusive = \
_process_numeric_min_max_properties(
number_spec,
self.config.number_min,
self.config.is_number_min_exclusive,
self.config.number_max,
self.config.is_number_max_exclusive
)
if is_min_exclusive and is_max_exclusive:
n = _random_open(min_, max_)
elif is_min_exclusive:
n = _random_half_open_lower(min_, max_)
elif is_max_exclusive:
n = _random_half_open_upper(min_, max_)
else:
n = _random_closed(min_, max_)
return n
[docs] def generate_boolean(self, boolean_spec, spec_name_stack=None):
"""
Generate a boolean from the given specification.
:param boolean_spec: A boolean specification (ignored; there's nothing
to configure for now)
:param spec_name_stack: A specification name stack, for reference loop
detection. Unused but included for API compatibility with
object/array generators.
:return: True or False
"""
if random.random() < 0.5:
return True
return False
[docs] def generate_null(self, null_spec, spec_name_stack=None):
"""
Generate null (None).
:param null_spec: A null specification (ignored; there's nothing
to configure for now)
:param spec_name_stack: A specification name stack, for reference loop
detection. Unused but included for API compatibility with
object/array generators.
:return: None
"""
return None
# This has to be at the bottom, after the methods are defined. Would it
# have been better to store method names and use getattr() to get the
# methods instead? Or generate a function name from a template? This is
# yet another map to keep sync'd up with others. Think about ways of
# improving this situation...
__GENERATOR_METHOD_MAP = {
"object": generate_object,
"array": generate_array,
"string": generate_string,
"integer": generate_integer,
"number": generate_number,
"boolean": generate_boolean,
"null": generate_null
}
def _get_logger():
global _log
if _log is None:
_log = logging.getLogger(__name__)
return _log
def _get_value_coconstraints(object_spec):
"""
Get the value coconstraints, if any, from the given object specification.
This also does some error checking.
:param object_spec: The object specification whose value coconstraints
should be checked.
:return: A list of ValueCoconstraint objects; will be empty if there are
none defined.
:raises stix2generator.exceptions.ValueCoconstraintError: If an
invalid value co-constraint is found.
"""
# This function shouldn't be called if object_spec has no properties, but
# just in case...
assert "properties" in object_spec
value_coconstraints = object_spec.get("value-coconstraints", [])
prop_specs = object_spec["properties"]
coconstraint_objs = []
for coconstraint in value_coconstraints:
coconstraint_obj = \
stix2generator.generation.constraints.make_value_coconstraint(
coconstraint
)
coconstraint_objs.append(coconstraint_obj)
if coconstraint_obj.prop_name_left not in prop_specs:
raise ValueCoconstraintError(
coconstraint,
"Property '{}' undefined in specification".format(
coconstraint_obj.prop_name_left
)
)
if coconstraint_obj.prop_name_right not in prop_specs:
raise ValueCoconstraintError(
coconstraint,
"Property '{}' undefined in specification".format(
coconstraint_obj.prop_name_right
)
)
# Another scan through the coconstraints to check for properties
# referenced more than once.
prop_occurrence_counts = {}
def inc_count_for_key(d, k):
count = d.setdefault(k, 0) + 1
d[k] = count
for coconstraint_obj in coconstraint_objs:
inc_count_for_key(
prop_occurrence_counts,
coconstraint_obj.prop_name_left
)
inc_count_for_key(
prop_occurrence_counts,
coconstraint_obj.prop_name_right
)
props_with_count_gt_1 = [
k for k, v in prop_occurrence_counts.items()
if v > 1
]
if props_with_count_gt_1:
log = _get_logger()
log.warning(
"Some properties are referenced in more than one value"
" co-constraint. If such a property requires constraining based on"
" another property value, only the first such co-constraint will be"
" consulted: %s", ", ".join(props_with_count_gt_1)
)
return coconstraint_objs
def _check_property_groups(groups_spec, property_specs):
"""
Do some sanity checks on the given property groups: empty groups,
bad property names, naming conflicts, etc. Runs for side-effects
(exceptions) and doesn't return anything.
:param groups_spec: The groups spec from a presence coconstraint
specification from an object specification
:param property_specs: The properties specifications from an object
specification
:raises stix2generator.exceptions.InvalidPropertyGroupError: If
there is a problem with a property group
"""
for group_name, prop_names in groups_spec.items():
if not prop_names:
raise InvalidPropertyGroupError(
group_name, "group can't be empty"
)
if group_name in property_specs:
raise InvalidPropertyGroupError(
group_name, "group name conflicts with a property name"
)
undef_props = set(prop_names) - property_specs.keys()
if undef_props:
raise InvalidPropertyGroupError(
group_name, 'undefined property(s): {}'.format(
", ".join(undef_props)
)
)
# check pairwise intersections to ensure all groups are disjoint
if len(groups_spec) > 1:
for group1, group2 in itertools.combinations(groups_spec, 2):
overlaps = set(groups_spec[group1]) & set(groups_spec[group2])
if overlaps:
raise InvalidPropertyGroupError(
group2,
'overlaps with group "{}". Overlapping properties: {}'
.format(
group1,
", ".join(overlaps)
)
)
def _get_group_coconstraints(presence_coconstraints, property_specs):
"""
Get property group co-constraints from the presence co-constraint
part of an object specification.
Length 1 property groups will have some sanity checking done, but
will otherwise be ignored. That is better accomplished by using the
property directly instead of putting it in its own group.
:param presence_coconstraints: The presence co-constraints dict from an
object specification.
:param property_specs: The property definitions dict from an object
specification.
:return: A mapping of group name to
stix2generator.generation.constraints.PresenceCoconstraint object
representing the group co-constraint. If there were no property
groups defined, the map will be empty.
:raises stix2generator.exceptions.PresenceCoconstraintError: If an invalid
presence co-constraint is found
"""
# Should map group names to lists of property names (group contents)
group_specs = presence_coconstraints.get("property-groups", {})
# Sanity check groups
_check_property_groups(group_specs, property_specs)
all_of_groups = set(presence_coconstraints.get("all", []))
one_of_groups = set(presence_coconstraints.get("one", []))
at_least_one_of_groups = set(
presence_coconstraints.get("at-least-one", [])
)
# Sanity check for bad group names in constraint type lists
for group_name in itertools.chain(
all_of_groups, one_of_groups, at_least_one_of_groups
):
if group_name not in group_specs:
raise PresenceCoconstraintError(
"Group not found: " + group_name
)
# Ensure no group is assigned more than one constraint type
constraint_conflicts = set()
for groups1, groups2 in itertools.combinations((
all_of_groups, one_of_groups, at_least_one_of_groups
), 2):
constraint_conflicts |= groups1 & groups2
if constraint_conflicts:
raise PresenceCoconstraintError(
"Property group(s) have conflicting co-constraints: {}".format(
", ".join(constraint_conflicts)
)
)
# Define a default constraint type, or require that every group be
# explicitly assigned one? Maybe being explicit is clearer? So check for
# groups which weren't assigned constraint types.
unassigned_groups = group_specs.keys() - all_of_groups - \
one_of_groups - at_least_one_of_groups
if unassigned_groups:
raise PresenceCoconstraintError(
"Property group(s) were not assigned co-constraint types: "
"{}".format(
", ".join(unassigned_groups)
)
)
# Filter out length-1 groups. I think I'd like the above checks to be done
# on them anyway, to avoid silly specifications, but here we will start
# ignoring them.
groups_to_ignore = []
for group_name, property_names in group_specs.items():
if len(property_names) == 1:
groups_to_ignore.append(group_name)
group_specs = dict(group_specs) # shallow copy ok
for group_name in groups_to_ignore:
del group_specs[group_name]
constraint_objs = {
group_name: stix2generator.generation.constraints.PresenceCoconstraint(
property_names,
"one" if group_name in one_of_groups
else "all" if group_name in all_of_groups
else "at-least-one"
)
for group_name, property_names in group_specs.items()
}
return constraint_objs
def _get_dependency_coconstraints(presence_coconstraints, group_coconstraints,
property_specs):
"""
Get dependency co-constraints, which is part of the presence co-constraints.
This just does a lot of sanity checks on the dependencies object from the
spec.
:param presence_coconstraints:
:param group_coconstraints:
:param property_specs:
:return: The dependency co-constraints object. If none was given, returns
an empty dict.
:raises stix2generator.exceptions.PresenceCoconstraintError: If an invalid
presence co-constraint is found
"""
deps = presence_coconstraints.get("dependencies", {})
grouped_property_names = set(
itertools.chain.from_iterable(
coco.property_names
for coco in group_coconstraints.values()
)
)
def is_group_or_prop(name):
return name in property_specs or name in group_coconstraints
for key_name, prop_list in deps.items():
if not is_group_or_prop(key_name):
raise PresenceCoconstraintError(
"Unrecognized group or property: " + key_name
)
if key_name in grouped_property_names:
raise PresenceCoconstraintError(
'Property "{}" is grouped and cannot be referenced'
' individually'.format(key_name)
)
for name in prop_list:
if not is_group_or_prop(name):
raise PresenceCoconstraintError(
"Unrecognized group or property: " + name
)
if name in grouped_property_names:
raise PresenceCoconstraintError(
'Property "{}" is grouped and cannot be referenced'
' individually'.format(name)
)
if name in deps:
raise PresenceCoconstraintError(
"Dependency key can't also occur in a dependency"
" value: " + name
)
return deps
def _get_presence_coconstraints(object_spec):
"""
Get presence co-constraint info from the given object specification.
This includes the groups and dependencies.
:param object_spec: The object specification
:return: A 2-tuple with (a) the group co-constraint mapping from group
name to constraint object, and the dependencies mapping from property
or group name to list of properties/groups.
:raises stix2generator.exceptions.PresenceCoconstraintError: If an invalid
presence co-constraint is found
"""
presence_coconstraints = object_spec.get("presence-coconstraints", {})
property_specs = object_spec.get("properties", {})
group_coconstraints = _get_group_coconstraints(
presence_coconstraints, property_specs
)
dependency_coconstraints = _get_dependency_coconstraints(
presence_coconstraints, group_coconstraints, property_specs
)
return group_coconstraints, dependency_coconstraints
def _get_properties_to_include(
object_spec, optional_property_probability, minimize_ref_properties
):
"""
Determine which object properties to include, based on required/optional
choices and any defined presence co-constraints.
:param object_spec: The object spec
:param optional_property_probability: The probability an optional property
should be included. Must be a number from 0 to 1.
:param minimize_ref_properties: True if we should minimize optional
reference properties. False if they should receive no special
treatment.
:return: The property names, as a set of strings
:raises stix2generator.exceptions.PresenceCoconstraintError: If an invalid
presence co-constraint is found
:raises stix2generator.exceptions.UndefinedPropertyError: If a reference to an
undefined property or group is found in the "required" or "optional"
property value of the spec
:raises stix2generator.exceptions.ObjectGenerationError: If a reference to a
grouped property is found
"""
prop_specs = object_spec.get("properties", {})
required_names = object_spec.get("required")
optional_names = object_spec.get("optional")
if required_names is not None and optional_names is not None:
raise ObjectGenerationError(
'"required" and "optional" can\'t both be present'
)
# If neither optional nor required names are specified, all
# properties/groups will be required.
elif required_names is None and optional_names is None:
# empty optional set = all required
optional_names = set()
# Convert to sets to remove dupes
elif required_names is not None:
required_names = set(required_names)
elif optional_names is not None:
optional_names = set(optional_names)
group_coconstraints, dependency_coconstraints = \
_get_presence_coconstraints(object_spec)
# Detect errors in the required/optional prop list: all must be
# defined, and grouped properties must not be referenced
req_or_opt = required_names if required_names is not None \
else optional_names
defined_prop_names = prop_specs.keys()
defined_group_names = group_coconstraints.keys()
grouped_property_names = set(
itertools.chain.from_iterable(
coco.property_names
for coco in group_coconstraints.values()
)
)
undef_name_errors = req_or_opt - defined_prop_names - defined_group_names
if undef_name_errors:
raise UndefinedPropertyError(undef_name_errors)
grouped_prop_errors = req_or_opt & grouped_property_names
if grouped_prop_errors:
raise ObjectGenerationError(
"Property(s) are grouped and cannot be referenced"
" individually: {}".format(
", ".join(
"{}".format(p) for p in grouped_prop_errors
)
)
)
# Include all ungrouped property names and property group names in
# the same "pool" of names one can specify as required or optional.
name_pool = (defined_prop_names - grouped_property_names) \
| defined_group_names
# Get set of optional names (whether they specified "required" or
# "optional" in the spec).
effectively_optional_names = optional_names if optional_names is not None \
else name_pool - required_names
# Start out the set of names to include with all required ones.
names_to_include = required_names if required_names is not None \
else name_pool - effectively_optional_names
# And then maybe add some optional ones.
for name in effectively_optional_names:
is_group = name in defined_group_names
is_ref = name.endswith("_ref") or name.endswith("_refs")
can_include = False
if minimize_ref_properties:
if is_group:
if group_coconstraints[name].can_satisfy_without_refs():
can_include = True
elif not is_ref:
can_include = True
else:
can_include = True
if can_include and random.random() < optional_property_probability:
names_to_include.add(name)
# Incorporate the "dependencies": add any other properties we
# require
for dep_key, dep_names in dependency_coconstraints.items():
if dep_key in names_to_include:
names_to_include.update(dep_names)
# For any names which are property groups, expand them to the
# component properties according to their co-constraints
# ... can't modify a set as you iterate! So need a temp set.
temp_set = set()
for name in names_to_include:
if name in group_coconstraints:
temp_set.update(
group_coconstraints[name].choose_properties(
optional_property_probability,
minimize_ref_properties
)
)
else:
temp_set.add(name)
names_to_include = temp_set
return names_to_include
def _get_value_constraint(prop_name, coconstraints, partially_generated_object):
"""
Get a value constraint object derived from a value co-constraint involving
the given property.
:param prop_name: The property to check for co-constraints
:param coconstraints: An iterable of co-constraint objects
(stix2generator.generation.constraints.ValueCoconstraint)
:param partially_generated_object: The object being generated, in its
current state of partial construction. This is necessary to find the
value of the other property involved in the co-constraint, if any.
:return: A constraint object
(stix2generator.generation.constraints.ValueConstraint) representing a
necessary constraint on the given property, or None if no constraint is
necessary.
"""
constraint = None
for coconstraint in coconstraints:
if coconstraint.involves_property(prop_name):
other_prop_name = coconstraint.get_other_property(
prop_name
)
if other_prop_name in partially_generated_object:
constraint = coconstraint.get_constraint(
other_prop_name,
partially_generated_object[other_prop_name]
)
break
return constraint
def _json_type_from_python_type(python_type):
"""
Infers a JSON type from a python type. This is necessary for reference
verification when the type isn't explicitly given (e.g. a "const" spec).
:param python_type: The python type (a 'type' object)
:return: A JSON type name
:raises stix2generator.exceptions.ObjectGenerationError: If a JSON type can't
be inferred from python_type
"""
json_type = _JSON_TYPE_MAP.get(python_type)
if json_type is None:
raise ObjectGenerationError(
"Can't infer JSON type from " + str(python_type)
)
return json_type
def _get_spec_type(spec):
"""
Determine the type of the given spec, as one of the supported JSON
types.
:param spec: A specification
:return: A spec JSON type as a string, e.g. "string", "array", etc.
:raises stix2generator.exceptions.ObjectGenerationError: If a const spec
where the spec type can't be inferred from the constant; if a
non-const spec whose "type" property is missing
:raises stix2generator.exceptions.UnrecognizedJSONTypeError: if the value of
the "type" property isn't a recognized JSON type
"""
if isinstance(spec, dict):
if "const" in spec:
# type is implied by the value of the "const" property.
value_type = type(spec["const"])
json_type = _json_type_from_python_type(value_type)
elif "type" in spec:
json_type = spec["type"]
if json_type not in _JSON_TYPES:
raise UnrecognizedJSONTypeError(json_type)
else:
raise ObjectGenerationError(
'"type" property is missing'
)
else:
# the spec is the value. Check its type.
value_type = type(spec)
json_type = _json_type_from_python_type(value_type)
return json_type
def _process_numeric_min_max_properties(
spec,
default_min,
is_default_min_exclusive,
default_max,
is_default_max_exclusive
):
"""
Factors out a rather large chunk of code for validating and processing
the min/max properties on numbers and integers. Maybe we need a
JSON-Schema for specifications and validate against that, to reduce the
amount of hand-written validation code we need to write...
:param spec: A number or integer spec
:param default_min: If the spec doesn't specify a minimum, use this as
the default.
:param is_default_min_exclusive: Whether default_min, if it is used,
is an exclusive bound.
:param default_max: If the spec doesn't specify a maximum, use this as
the default.
:param is_default_max_exclusive: Whether default_max, if it is used,
is an exclusive bound.
:return: A (num, bool, num, bool) 4-tuple giving the bounds and whether
each bound is exclusive or not:
(min, is_min_exclusive, max, is_max_exclusive)
:raises stix2generator.exceptions.ObjectGenerationError: For various types
of problems with numeric specifications
"""
if "minimum" in spec and "exclusiveMinimum" in spec:
raise ObjectGenerationError(
"minimum and exclusiveMinimum can't both be present"
)
if "maximum" in spec and "exclusiveMaximum" in spec:
raise ObjectGenerationError(
"maximum and exclusiveMaximum can't both be present"
)
min_given = any(
p in spec
for p in ("minimum", "exclusiveMinimum")
)
max_given = any(
p in spec
for p in ("maximum", "exclusiveMaximum")
)
# I think this check is necessary since user-specified min/max could well
# be out of order w.r.t. defaults, producing unexpected errors. What would
# users expect the other bound to be anyway, if they only gave one bound?
if (min_given and not max_given) or (max_given and not min_given):
raise ObjectGenerationError(
"can't give minimum without a maximum, or vice versa"
)
if "minimum" in spec:
min_ = spec["minimum"]
is_min_exclusive = False
elif "exclusiveMinimum" in spec:
min_ = spec["exclusiveMinimum"]
is_min_exclusive = True
else:
min_ = default_min
is_min_exclusive = is_default_min_exclusive
if "maximum" in spec:
max_ = spec["maximum"]
is_max_exclusive = False
elif "exclusiveMaximum" in spec:
max_ = spec["exclusiveMaximum"]
is_max_exclusive = True
else:
max_ = default_max
is_max_exclusive = is_default_max_exclusive
if min_ > max_:
raise ObjectGenerationError(
"minimum can't be greater than maximum"
)
elif min_ == max_ and (is_max_exclusive or is_min_exclusive):
raise ObjectGenerationError(
"In an open or half-open interval, minimum must be strictly "
"less than maximum"
)
return min_, is_min_exclusive, max_, is_max_exclusive
def _random_half_open_upper(min_, ex_max):
assert min_ < ex_max
# easy case... I think, since random.random() already has the right
# openness.
n = min_ + (ex_max - min_) * random.random()
return n
def _random_half_open_lower(ex_min, max_):
assert ex_min < max_
# harder case: we compute the opposite openness, then "flip" it by
# negating the result. So for example, [a,b) becomes (-b,-a]. That
# gives us the correct openness and range, but wrong endpoints. Then, we
# just "shift" the interval to its proper endpoints. That's one way of
# looking at it, at least.
n = ex_min + max_ - _random_half_open_upper(ex_min, max_)
return n
def _random_closed(min_, max_):
assert min_ <= max_
# Python gives us a simple API which is documented to use a closed
# interval, but the same docs say that one endpoint may or may not actually
# be included... so maybe this doesn't actually work? It actually quotes
# the exact same equation I used in _random_half_open_upper()!
return random.uniform(min_, max_)
def _random_open(ex_min, ex_max):
assert ex_min < ex_max
# It's not obvious how to have a uniformly distributed open interval. I
# had an idea to add two intervals of opposite openness covering the same
# range and divide by 2, to obtain a totally open interval. E.g. to get
# (0, 1) compute ([0, 1) + (0, 1]) / 2. I think this yields an open
# interval, but at the expense of uniformity.
# n = _random_half_open_upper(ex_min, ex_max) + \
# _random_half_open_lower(ex_min, ex_max)
# n /= 2.0
# Another idea is to split an open interval into two half-open intervals
# which are joined at a closed boundary in the middle. This implies that
# the mid point is slightly more likely than other points, so this isn't
# uniform either, but I think it's better than above. Perhaps it's close
# enough??
mid = (ex_min + ex_max) / 2.0
if random.random() < 0.5:
n = _random_half_open_lower(ex_min, mid)
else:
n = _random_half_open_upper(mid, ex_max)
return n
# Another possibility I found on stackoverflow was essentially to generate
# a random positive integer (1 to something big, to give a lot of possible
# distinct generated numbers) and divide by a number slightly larger than
# the maximum. https://stackoverflow.com/a/19934205