Source code for stix2generator.generation.semantics

import datetime
import hashlib
import logging
import random
import uuid

import pytz

import stix2generator.exceptions
import stix2generator.generation.pattern_generator
import stix2generator.generation.reference_graph_generator
import stix2generator.utils

# The property which names semantics.  Is "semantic" or "semantics" better?

[docs]class SemanticsProvider: """ Abstract base class for semantics providers. Instances generate data given an abstract semantic name and some additional settings. NOTE: An implementation is provided the generator it was invoked through so that it may leverage auto-generation to create parts of its values. Avoid invoking a named specification from the given generator (e.g. via the "generate" method). To be reusable across generators, a provider can't make assumptions as to which named specifications exist. Also, you don't know what context the semantic is being invoked within: if the named specification is already in the midst of generation, you will cause infinite recursion. Semantics implementations should only generate from specifications which don't contain any references. """ def __init__(self): # A logger for semantics providers, lazy-initialized self.__logger = None @property def logger(self): """ Convenience property to get a logger named after the instance's module and class name. :return: A logger instance """ if not self.__logger: cls = self.__class__ self.__logger = logging.getLogger( cls.__module__ + "." + cls.__name__ ) return self.__logger
[docs] def get_semantics(self): """ Get a list of the semantics provided by the provider. Subclasses must override this. :return: The list of semantics (list of strings which are the names of the semantics). """ raise NotImplementedError()
[docs] def create_semantic(self, spec, generator, constraint=None): """ Create a value according to the given semantic spec. For convenience, this default implementation tries to invoke a method named the same as the semantic name, except if the name contains hyphens. If it contains hyphens, they are translated to underscores first, so it is easier to define the methods. (Can't easily have a method name with a hyphen.) Subclasses can override this to do something else. :param spec: The semantic spec, which must be a mapping with a "semantics" property giving the name of the semantic. Of course, that name should be one of the names returned by get_semantics()! Other properties may be used by the implementation, e.g. to influence how generation is done. :param generator: The generator through which this provider was invoked. This allows implementations to delegate to other specifications as needed. (But see the note in the class docstring.) :param constraint: A ValueConstraint instance representing some additional constraint to be honored by the generator. This is derived from a co-constraint expression. If None, there is no additional constraint. :return: The generated value """ semantic = spec[SEMANTIC_PROPERTY_NAME].replace("-", "_") m = getattr(self, semantic) return m(spec, generator, constraint)
[docs]class FakerSemantics(SemanticsProvider): """ A semantics provider which uses a faker. The semantics names are taken directly from the faker object's providers' method names. You can pass arguments into the faker method by adding additional properties to the specification. They will be passed as keyword args. """ def __init__(self, faker, faker_names=None): """ Initialize this semantics provider. :param faker: The faker object to use :param faker_names: Iterable of names of faker methods to use from the given faker object, or None. If None, the faker object's installed providers are scanned; this can result in a *lot* of names and a greater probability of semantic name collisions among semantics providers. """ super().__init__() self.__faker = faker if faker_names: self.__semantics = list(faker_names) # Verify these names are valid faker methods? for name in self.__semantics: m = getattr(faker, name, None) if m is None: # Or should this cause an exception...? self.logger.warning( "Faker function not found: %s", name ) else: # This is how faker finds methods from providers, as far as I could # tell from code inspection. Faker also makes the methods # available directly on the faker object, but I still need to go # through the providers since a faker object has other methods # besides, and I wouldn't have a way of distinguishing. self.__semantics = [ meth for prov in faker.get_providers() for meth in dir(prov) if not meth.startswith("_") and callable(getattr(prov, meth)) ]
[docs] def get_semantics(self): return self.__semantics
[docs] def create_semantic(self, spec, generator, constraint=None): faker_kwargs = dict(spec) # shallow-copy is ok # Remove stuff that's not part of the faker args, get desired # faker semantics name del faker_kwargs["type"] faker_semantics_name = faker_kwargs.pop(SEMANTIC_PROPERTY_NAME) faker_func = getattr(self.__faker, faker_semantics_name) return faker_func(**faker_kwargs)
[docs]class STIXSemantics(SemanticsProvider): """ Some STIX-specific custom semantics. """ SEMANTICS = [ "stix-id", "stix-timestamp", "stix-pattern", "sha256", # (conflicts with a Faker function) "sha512", "sha3_256", "sha3_512", "observable-container", ] _TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ" _TIMESTAMP_FORMAT_FRAC = "%Y-%m-%dT%H:%M:%S.%fZ"
[docs] def get_semantics(self): return self.SEMANTICS
[docs] def stix_id(self, spec, generator, constraint): """ Create a STIX ID. The spec requires the "stix-type" property, which determines the "<type>--" prefix. """ if "stix-type" in spec: stix_type = spec["stix-type"] value = stix_type + "--" + str(uuid.uuid4()) else: raise stix2generator.exceptions.ObjectGenerationError( 'missing required "stix-type" property', spec[SEMANTIC_PROPERTY_NAME] ) return value
[docs] def stix_timestamp(self, spec, generator, constraint): """ Create a random STIX timestamp which honors the given constraint. """ if constraint: fmt = self._TIMESTAMP_FORMAT_FRAC if "." in constraint.value \ else self._TIMESTAMP_FORMAT constraint_dt = datetime.datetime.strptime(constraint.value, fmt) constraint_dt = constraint_dt.replace(tzinfo=pytz.utc) if constraint.operator == constraint.EQ: timestamp_dt = constraint_dt else: equal_allowed = constraint.operator in ( constraint.GE, constraint.LE ) # choose a random timestamp within a year of the constraint # timestamp...? random_duration = datetime.timedelta( seconds=random.randint( 0 if equal_allowed else 1, 60 * 60 * 24 * 365 - 1 ) ) if constraint.operator in (constraint.LT, constraint.LE): random_duration = -random_duration elif constraint.operator == constraint.NE: if random.random() < 0.5: random_duration = -random_duration # else for GE/GT, leave random_duration as-is timestamp_dt = constraint_dt + random_duration else: now_dt = # choose random within a year...? random_duration = datetime.timedelta( seconds=random.randint( 0, 60 * 60 * 24 * 365 - 1 ) ) if random.random() < 0.5: random_duration = -random_duration timestamp_dt = now_dt + random_duration timestamp_str = timestamp_dt.strftime(self._TIMESTAMP_FORMAT) return timestamp_str
[docs] def stix_pattern(self, spec, generator, constraint): """ Create a random STIX pattern. """ # Make kwargs from the spec, which we can use to create a pattern # generator Config object. So configuring pattern generation is easy: # just use properties in the object generator spec which are named the # same as the Config settings for pattern generation. config_kwargs = dict(spec) config_kwargs.pop("type") config_kwargs.pop(SEMANTIC_PROPERTY_NAME) config = stix2generator.generation.pattern_generator.Config( **config_kwargs ) patt_gen = stix2generator.generation.pattern_generator.PatternGenerator( generator, "2.1", config ) pattern = patt_gen.generate() return pattern
def _generate_hash(self, hash_function): """ Generate a random hash using the given function. """ value = hash_function(str(random.random()).encode()) return value.hexdigest()
[docs] def sha256(self, spec, generator, constraint): """ Generate a random SHA256 hash. """ return self._generate_hash(hashlib.sha256)
[docs] def sha512(self, spec, generator, constraint): """ Generate a random SHA512 hash. """ return self._generate_hash(hashlib.sha512)
[docs] def sha3_256(self, spec, generator, constraint): """ Generate a random SHA3-256 hash. """ return self._generate_hash(hashlib.sha3_256)
[docs] def sha3_512(self, spec, generator, constraint): """ Generate a random SHA3-512 hash. """ return self._generate_hash(hashlib.sha3_512)
[docs] def observable_container(self, spec, generator, constraint): """ Generate an "observable container" object. This is used as the value of the "objects" property of STIX 2.0-style observed-data objects. That property is deprecated in STIX 2.1. """ config_kwargs = dict(spec) config_kwargs.pop("type") config_kwargs.pop(SEMANTIC_PROPERTY_NAME) config = stix2generator.generation.reference_graph_generator.Config( **config_kwargs ) observable_container_generator = \ stix2generator.generation.reference_graph_generator\ .ReferenceGraphGenerator(generator, config) sco_type = stix2generator.utils.random_generatable_stix_type( generator, stix2generator.utils.STIXTypeClass.SCO ) _, container = observable_container_generator.generate(sco_type) return container