recorder.py - OpenGrok cross reference for /dports/devel/py-aws-xray-sdk/aws-xray-sdk-2.8.0/aws_xray_sdk/core/recorder.py

import copy
import json
import logging
import os
import platform
import time

from aws_xray_sdk import global_sdk_config
from aws_xray_sdk.version import VERSION
from .models.segment import Segment, SegmentContextManager
from .models.subsegment import Subsegment, SubsegmentContextManager
from .models.default_dynamic_naming import DefaultDynamicNaming
from .models.dummy_entities import DummySegment, DummySubsegment
from .emitters.udp_emitter import UDPEmitter
from .sampling.sampler import DefaultSampler
from .sampling.local.sampler import LocalSampler
from .streaming.default_streaming import DefaultStreaming
from .context import Context
from .daemon_config import DaemonConfig
from .plugins.utils import get_plugin_modules
from .lambda_launcher import check_in_lambda
from .exceptions.exceptions import SegmentNameMissingException, SegmentNotFoundException
from .utils.compat import string_types
from .utils import stacktrace

log = logging.getLogger(__name__)

TRACING_NAME_KEY = 'AWS_XRAY_TRACING_NAME'
DAEMON_ADDR_KEY = 'AWS_XRAY_DAEMON_ADDRESS'
CONTEXT_MISSING_KEY = 'AWS_XRAY_CONTEXT_MISSING'

XRAY_META = {
    'xray': {
        'sdk': 'X-Ray for Python',
        'sdk_version': VERSION
    }
}

SERVICE_INFO = {
    'runtime': platform.python_implementation(),
    'runtime_version': platform.python_version()
}


class AWSXRayRecorder(object):
    """
    A global AWS X-Ray recorder that will begin/end segments/subsegments
    and send them to the X-Ray daemon. This recorder is initialized during
    loading time so you can use::

        from aws_xray_sdk.core import xray_recorder

    in your module to access it
    """
    def __init__(self):

        self._streaming = DefaultStreaming()
        context = check_in_lambda()
        if context:
            # Special handling when running on AWS Lambda.
            self._context = context
            self.streaming_threshold = 0
            self._sampler = LocalSampler()
        else:
            self._context = Context()
            self._sampler = DefaultSampler()

        self._emitter = UDPEmitter()
        self._sampling = True
        self._max_trace_back = 10
        self._plugins = None
        self._service = os.getenv(TRACING_NAME_KEY)
        self._dynamic_naming = None
        self._aws_metadata = copy.deepcopy(XRAY_META)
        self._origin = None
        self._stream_sql = True

        if type(self.sampler).__name__ == 'DefaultSampler':
            self.sampler.load_settings(DaemonConfig(), self.context)

    def configure(self, sampling=None, plugins=None,
                  context_missing=None, sampling_rules=None,
                  daemon_address=None, service=None,
                  context=None, emitter=None, streaming=None,
                  dynamic_naming=None, streaming_threshold=None,
                  max_trace_back=None, sampler=None,
                  stream_sql=True):
        """Configure global X-Ray recorder.

        Configure needs to run before patching thrid party libraries
        to avoid creating dangling subsegment.
        :param bool sampling: If sampling is enabled, every time the recorder
            creates a segment it decides whether to send this segment to
            the X-Ray daemon. This setting is not used if the recorder
            is running in AWS Lambda. The recorder always respect the incoming
            sampling decisions regardless of this setting.
        :param sampling_rules: Pass a set of local custom sampling rules.
            Can be an absolute path of the sampling rule config json file
            or a dictionary that defines those rules. This will also be the
            fallback rules in case of centralized sampling opted-in while
            the cetralized sampling rules are not available.
        :param sampler: The sampler used to make sampling decisions. The SDK
            provides two built-in samplers. One is centralized rules based and
            the other is local rules based. The former is the default.
        :param tuple plugins: plugins that add extra metadata to each segment.
            Currently available plugins are EC2Plugin, ECS plugin and
            ElasticBeanstalkPlugin.
            If you want to disable all previously enabled plugins,
            pass an empty tuple ``()``.
        :param str context_missing: recorder behavior when it tries to mutate
            a segment or add a subsegment but there is no active segment.
            RUNTIME_ERROR means the recorder will raise an exception.
            LOG_ERROR means the recorder will only log the error and
            do nothing.
        :param str daemon_address: The X-Ray daemon address where the recorder
            sends data to.
        :param str service: default segment name if creating a segment without
            providing a name.
        :param context: You can pass your own implementation of context storage
            for active segment/subsegment by overriding the default
            ``Context`` class.
        :param emitter: The emitter that sends a segment/subsegment to
            the X-Ray daemon. You can override ``UDPEmitter`` class.
        :param dynamic_naming: a string that defines a pattern that host names
            should match. Alternatively you can pass a module which
            overrides ``DefaultDynamicNaming`` module.
        :param streaming: The streaming module to stream out trace documents
            when they grow too large. You can override ``DefaultStreaming``
            class to have your own implementation of the streaming process.
        :param streaming_threshold: If breaks within a single segment it will
            start streaming out children subsegments. By default it is the
            maximum number of subsegments within a segment.
        :param int max_trace_back: The maxinum number of stack traces recorded
            by auto-capture. Lower this if a single document becomes too large.
        :param bool stream_sql: Whether SQL query texts should be streamed.

        Environment variables AWS_XRAY_DAEMON_ADDRESS, AWS_XRAY_CONTEXT_MISSING
        and AWS_XRAY_TRACING_NAME respectively overrides arguments
        daemon_address, context_missing and service.
        """

        if sampling is not None:
            self.sampling = sampling
        if sampler:
            self.sampler = sampler
        if service:
            self.service = os.getenv(TRACING_NAME_KEY, service)
        if sampling_rules:
            self._load_sampling_rules(sampling_rules)
        if emitter:
            self.emitter = emitter
        if daemon_address:
            self.emitter.set_daemon_address(os.getenv(DAEMON_ADDR_KEY, daemon_address))
        if context:
            self.context = context
        if context_missing:
            self.context.context_missing = os.getenv(CONTEXT_MISSING_KEY, context_missing)
        if dynamic_naming:
            self.dynamic_naming = dynamic_naming
        if streaming:
            self.streaming = streaming
        if streaming_threshold is not None:
            self.streaming_threshold = streaming_threshold
        if type(max_trace_back) == int and max_trace_back >= 0:
            self.max_trace_back = max_trace_back
        if stream_sql is not None:
            self.stream_sql = stream_sql

        if plugins:
            plugin_modules = get_plugin_modules(plugins)
            for plugin in plugin_modules:
                plugin.initialize()
                if plugin.runtime_context:
                    self._aws_metadata[plugin.SERVICE_NAME] = plugin.runtime_context
                    self._origin = plugin.ORIGIN
        # handling explicitly using empty list to clean up plugins.
        elif plugins is not None:
            self._aws_metadata = copy.deepcopy(XRAY_META)
            self._origin = None

        if type(self.sampler).__name__ == 'DefaultSampler':
            self.sampler.load_settings(DaemonConfig(daemon_address),
                                       self.context, self._origin)

    def in_segment(self, name=None, **segment_kwargs):
        """
        Return a segment context manager.

        :param str name: the name of the segment
        :param dict segment_kwargs: remaining arguments passed directly to `begin_segment`
        """
        return SegmentContextManager(self, name=name, **segment_kwargs)

    def in_subsegment(self, name=None, **subsegment_kwargs):
        """
        Return a subsegment context manager.

        :param str name: the name of the subsegment
        :param dict subsegment_kwargs: remaining arguments passed directly to `begin_subsegment`
        """
        return SubsegmentContextManager(self, name=name, **subsegment_kwargs)

    def begin_segment(self, name=None, traceid=None,
                      parent_id=None, sampling=None):
        """
        Begin a segment on the current thread and return it. The recorder
        only keeps one segment at a time. Create the second one without
        closing existing one will overwrite it.

        :param str name: the name of the segment
        :param str traceid: trace id of the segment
        :param int sampling: 0 means not sampled, 1 means sampled
        """
        # Disable the recorder; return a generated dummy segment.
        if not global_sdk_config.sdk_enabled():
            return DummySegment(global_sdk_config.DISABLED_ENTITY_NAME)

        seg_name = name or self.service
        if not seg_name:
            raise SegmentNameMissingException("Segment name is required.")

        # Sampling decision is None if not sampled.
        # In a sampled case it could be either a string or 1
        # depending on if centralized or local sampling rule takes effect.
        decision = True

        # we respect the input sampling decision
        # regardless of recorder configuration.
        if sampling == 0:
            decision = False
        elif sampling:
            decision = sampling
        elif self.sampling:
            decision = self._sampler.should_trace()

        if not decision:
            segment = DummySegment(seg_name)
        else:
            segment = Segment(name=seg_name, traceid=traceid,
                              parent_id=parent_id)
            self._populate_runtime_context(segment, decision)

        self.context.put_segment(segment)
        return segment

    def end_segment(self, end_time=None):
        """
        End the current segment and send it to X-Ray daemon
        if it is ready to send. Ready means segment and
        all its subsegments are closed.

        :param float end_time: segment completion in unix epoch in seconds.
        """
        # When the SDK is disabled we return
        if not global_sdk_config.sdk_enabled():
            return

        self.context.end_segment(end_time)
        segment = self.current_segment()
        if segment and segment.ready_to_send():
            self._send_segment()

    def current_segment(self):
        """
        Return the currently active segment. In a multithreading environment,
        this will make sure the segment returned is the one created by the
        same thread.
        """

        entity = self.get_trace_entity()
        if self._is_subsegment(entity):
            return entity.parent_segment
        else:
            return entity

    def begin_subsegment(self, name, namespace='local'):
        """
        Begin a new subsegment.
        If there is open subsegment, the newly created subsegment will be the
        child of latest opened subsegment.
        If not, it will be the child of the current open segment.

        :param str name: the name of the subsegment.
        :param str namespace: currently can only be 'local', 'remote', 'aws'.
        """
        # Generating the parent dummy segment is necessary.
        # We don't need to store anything in context. Assumption here
        # is that we only work with recorder-level APIs.
        if not global_sdk_config.sdk_enabled():
            return DummySubsegment(DummySegment(global_sdk_config.DISABLED_ENTITY_NAME))

        segment = self.current_segment()
        if not segment:
            log.warning("No segment found, cannot begin subsegment %s." % name)
            return None

        if not segment.sampled:
            subsegment = DummySubsegment(segment, name)
        else:
            subsegment = Subsegment(name, namespace, segment)

        self.context.put_subsegment(subsegment)

        return subsegment

    def current_subsegment(self):
        """
        Return the latest opened subsegment. In a multithreading environment,
        this will make sure the subsegment returned is one created
        by the same thread.
        """
        if not global_sdk_config.sdk_enabled():
            return DummySubsegment(DummySegment(global_sdk_config.DISABLED_ENTITY_NAME))

        entity = self.get_trace_entity()
        if self._is_subsegment(entity):
            return entity
        else:
            return None

    def end_subsegment(self, end_time=None):
        """
        End the current active subsegment. If this is the last one open
        under its parent segment, the entire segment will be sent.

        :param float end_time: subsegment compeletion in unix epoch in seconds.
        """
        if not global_sdk_config.sdk_enabled():
            return

        if not self.context.end_subsegment(end_time):
            return

        # if segment is already close, we check if we can send entire segment
        # otherwise we check if we need to stream some subsegments
        if self.current_segment().ready_to_send():
            self._send_segment()
        else:
            self.stream_subsegments()

    def put_annotation(self, key, value):
        """
        Annotate current active trace entity with a key-value pair.
        Annotations will be indexed for later search query.

        :param str key: annotation key
        :param object value: annotation value. Any type other than
            string/number/bool will be dropped
        """
        if not global_sdk_config.sdk_enabled():
            return
        entity = self.get_trace_entity()
        if entity and entity.sampled:
            entity.put_annotation(key, value)

    def put_metadata(self, key, value, namespace='default'):
        """
        Add metadata to the current active trace entity.
        Metadata is not indexed but can be later retrieved
        by BatchGetTraces API.

        :param str namespace: optional. Default namespace is `default`.
            It must be a string and prefix `AWS.` is reserved.
        :param str key: metadata key under specified namespace
        :param object value: any object that can be serialized into JSON string
        """
        if not global_sdk_config.sdk_enabled():
            return
        entity = self.get_trace_entity()
        if entity and entity.sampled:
            entity.put_metadata(key, value, namespace)

    def is_sampled(self):
        """
        Check if the current trace entity is sampled or not.
        Return `False` if no active entity found.
        """
        if not global_sdk_config.sdk_enabled():
            # Disabled SDK is never sampled
            return False
        entity = self.get_trace_entity()
        if entity:
            return entity.sampled
        return False

    def get_trace_entity(self):
        """
        A pass through method to ``context.get_trace_entity()``.
        """
        return self.context.get_trace_entity()

    def set_trace_entity(self, trace_entity):
        """
        A pass through method to ``context.set_trace_entity()``.
        """
        self.context.set_trace_entity(trace_entity)

    def clear_trace_entities(self):
        """
        A pass through method to ``context.clear_trace_entities()``.
        """
        self.context.clear_trace_entities()

    def stream_subsegments(self):
        """
        Stream all closed subsegments to the daemon
        and remove reference to the parent segment.
        No-op for a not sampled segment.
        """
        segment = self.current_segment()

        if self.streaming.is_eligible(segment):
            self.streaming.stream(segment, self._stream_subsegment_out)

    def capture(self, name=None):
        """
        A decorator that records enclosed function in a subsegment.
        It only works with synchronous functions.

        params str name: The name of the subsegment. If not specified
        the function name will be used.
        """
        return self.in_subsegment(name=name)

    def record_subsegment(self, wrapped, instance, args, kwargs, name,
                          namespace, meta_processor):

        subsegment = self.begin_subsegment(name, namespace)

        exception = None
        stack = None
        return_value = None

        try:
            return_value = wrapped(*args, **kwargs)
            return return_value
        except Exception as e:
            exception = e
            stack = stacktrace.get_stacktrace(limit=self.max_trace_back)
            raise
        finally:
            # No-op if subsegment is `None` due to `LOG_ERROR`.
            if subsegment is not None:
                end_time = time.time()
                if callable(meta_processor):
                    meta_processor(
                        wrapped=wrapped,
                        instance=instance,
                        args=args,
                        kwargs=kwargs,
                        return_value=return_value,
                        exception=exception,
                        subsegment=subsegment,
                        stack=stack,
                    )
                elif exception:
                    subsegment.add_exception(exception, stack)

                self.end_subsegment(end_time)

    def _populate_runtime_context(self, segment, sampling_decision):
        if self._origin:
            setattr(segment, 'origin', self._origin)

        segment.set_aws(copy.deepcopy(self._aws_metadata))
        segment.set_service(SERVICE_INFO)

        if isinstance(sampling_decision, string_types):
            segment.set_rule_name(sampling_decision)

    def _send_segment(self):
        """
        Send the current segment to X-Ray daemon if it is present and
        sampled, then clean up context storage.
        The emitter will handle failures.
        """
        segment = self.current_segment()

        if not segment:
            return

        if segment.sampled:
            self.emitter.send_entity(segment)
        self.clear_trace_entities()

    def _stream_subsegment_out(self, subsegment):
        log.debug("streaming subsegments...")
        self.emitter.send_entity(subsegment)

    def _load_sampling_rules(self, sampling_rules):

        if not sampling_rules:
            return

        if isinstance(sampling_rules, dict):
            self.sampler.load_local_rules(sampling_rules)
        else:
            with open(sampling_rules) as f:
                self.sampler.load_local_rules(json.load(f))

    def _is_subsegment(self, entity):

        return (hasattr(entity, 'type') and entity.type == 'subsegment')

    @property
    def enabled(self):
        return self._enabled

    @enabled.setter
    def enabled(self, value):
        self._enabled = value

    @property
    def sampling(self):
        return self._sampling

    @sampling.setter
    def sampling(self, value):
        self._sampling = value

    @property
    def sampler(self):
        return self._sampler

    @sampler.setter
    def sampler(self, value):
        self._sampler = value

    @property
    def service(self):
        return self._service

    @service.setter
    def service(self, value):
        self._service = value

    @property
    def dynamic_naming(self):
        return self._dynamic_naming

    @dynamic_naming.setter
    def dynamic_naming(self, value):
        if isinstance(value, string_types):
            self._dynamic_naming = DefaultDynamicNaming(value, self.service)
        else:
            self._dynamic_naming = value

    @property
    def context(self):
        return self._context

    @context.setter
    def context(self, cxt):
        self._context = cxt

    @property
    def emitter(self):
        return self._emitter

    @emitter.setter
    def emitter(self, value):
        self._emitter = value

    @property
    def streaming(self):
        return self._streaming

    @streaming.setter
    def streaming(self, value):
        self._streaming = value

    @property
    def streaming_threshold(self):
        """
        Proxy method to Streaming module's `streaming_threshold` property.
        """
        return self.streaming.streaming_threshold

    @streaming_threshold.setter
    def streaming_threshold(self, value):
        """
        Proxy method to Streaming module's `streaming_threshold` property.
        """
        self.streaming.streaming_threshold = value

    @property
    def max_trace_back(self):
        return self._max_trace_back

    @max_trace_back.setter
    def max_trace_back(self, value):
        self._max_trace_back = value

    @property
    def stream_sql(self):
        return self._stream_sql

    @stream_sql.setter
    def stream_sql(self, value):
        self._stream_sql = value