query/expression/UnionIterator.java

// This file is part of OpenTSDB.
// Copyright (C) 2015  The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query.expression;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import net.opentsdb.core.FillPolicy;
import net.opentsdb.core.IllegalDataException;
import net.opentsdb.core.TSDB;
import net.opentsdb.utils.ByteSet;

import org.hbase.async.HBaseClient;
import org.hbase.async.Bytes.ByteMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import sun.reflect.generics.reflectiveObjects.NotImplementedException;

/**
 * An iterator that computes the union of all series in the result sets. This
 * means we match every series with it's corresponding series in the other sets.
 * If one or more set lacks the matching series, then a {@code null} is stored
 * and when the caller iterates over the results, the need to detect the null
 * and substitute a fill value.
 * @since 2.3
 */
public class UnionIterator implements ITimeSyncedIterator, VariableIterator {
  private static final Logger LOG = LoggerFactory.getLogger(UnionIterator.class);

  /** The queries compiled and fetched from storage */
  private final Map<String, ITimeSyncedIterator> queries;

  /** A list of the current values for each series post intersection */
  private final Map<String, ExpressionDataPoint[]> current_values;

  /** A map used for single series iteration where the array is the index */
  private final Map<String, int[]> single_series_matrix;

  /** A map of the sub query index to their names for intersection computation */
  private final String[] index_to_names;

  /** Whether or not to intersect on the query tagks instead of the result set
   * tagks */
  private final boolean union_on_query_tagks;

  /** Whether or not to include the aggregated tags in the result set */
  private final boolean include_agg_tags;

  /** The start/current timestamp for the iterator in ms */
  private long timestamp;

  /** Post intersection number of time series */
  private int series_size;

  /** The ID of this iterator */
  private final String id;

  /** The index of this iterator in a list of iterators */
  private int index;

  /** The fill policy to use when a series is missing from one of the sets.
   * Default is zero. */
  private NumericFillPolicy fill_policy;

  /** A data point used for filling missing time series */
  private ExpressionDataPoint fill_dp;

  /**
   * Default ctor
   * @param id The variable ID for this iterator
   * @param results Upstream iterators
   * @param union_on_query_tagks Whether or not to flatten and join on only
   * the tags from the query or those returned in the results.
   * @param include_agg_tags Whether or not to include the flattened aggregated
   * tag keys in the join.
   */
  public UnionIterator(final String id, final Map<String, ITimeSyncedIterator> results,
      final boolean union_on_query_tagks, final boolean include_agg_tags) {
    this.id = id;
    this.union_on_query_tagks = union_on_query_tagks;
    this.include_agg_tags = include_agg_tags;
    timestamp = Long.MAX_VALUE;
    queries = new HashMap<String, ITimeSyncedIterator>(results.size());
    current_values = new HashMap<String, ExpressionDataPoint[]>(results.size());
    single_series_matrix = new HashMap<String, int[]>(results.size());
    index_to_names = new String[results.size()];
    fill_policy = new NumericFillPolicy(FillPolicy.ZERO);
    fill_dp = new ExpressionDataPoint();

    int i = 0;
    for (final Map.Entry<String, ITimeSyncedIterator> entry : results.entrySet()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Adding iterator " + entry.getValue());
      }
      queries.put(entry.getKey(), entry.getValue());
      entry.getValue().setIndex(i);
      index_to_names[i] = entry.getKey();
      ++i;
    }

    computeUnion();

    // calculate the starting timestamp from the various iterators
    for (final ITimeSyncedIterator it : queries.values()) {
      final long ts = it.nextTimestamp();
      if (ts < timestamp) {
        timestamp = ts;
      }
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("Computed union: " + this);
    }
  }

  /**
   * Private copy constructor that copies references and sets up new collections
   * without copying results.
   * @param iterator The iterator to copy from.
   */
  private UnionIterator(final UnionIterator iterator) {
    id = iterator.id;
    union_on_query_tagks = iterator.union_on_query_tagks;
    include_agg_tags = iterator.include_agg_tags;
    timestamp = Long.MAX_VALUE;
    queries = new HashMap<String, ITimeSyncedIterator>(iterator.queries.size());
    current_values = new HashMap<String, ExpressionDataPoint[]>(queries.size());
    single_series_matrix = new HashMap<String, int[]>(queries.size());
    index_to_names = new String[queries.size()];
    fill_policy = iterator.fill_policy;

    int i = 0;
    for (final Map.Entry<String, ITimeSyncedIterator> entry : iterator.queries.entrySet()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Adding iterator " + entry.getValue());
      }
      queries.put(entry.getKey(), entry.getValue());
      entry.getValue().setIndex(i);
      index_to_names[i] = entry.getKey();
      ++i;
    }

    computeUnion();

    // calculate the starting timestamp from the various iterators
    for (final ITimeSyncedIterator it : queries.values()) {
      final long ts = it.nextTimestamp();
      if (ts < timestamp) {
        timestamp = ts;
      }
    }
  }

  /**
   * Computes the union of all sets, matching on tags and optionally the
   * aggregated tags across each variable.
   */
  private void computeUnion() {
    // key = flattened tags, array of queries.size()
    final ByteMap<ExpressionDataPoint[]> ordered_union =
        new ByteMap<ExpressionDataPoint[]>();

    final Iterator<ITimeSyncedIterator> it = queries.values().iterator();
    while (it.hasNext()) {
      final ITimeSyncedIterator sub = it.next();
      final ExpressionDataPoint[] dps = sub.values();
      final ByteMap<Integer> local_tags = new ByteMap<Integer>();

      for (int i = 0; i < sub.size(); i++) {
        final byte[] key = flattenTags(union_on_query_tagks, include_agg_tags,
            dps[i], sub);
        local_tags.put(key, i);
        ExpressionDataPoint[] udps = ordered_union.get(key);
        if (udps == null) {
          udps = new ExpressionDataPoint[queries.size()];
          ordered_union.put(key, udps);
        }
        udps[sub.getIndex()] = dps[i];
      }
    }

    if (ordered_union.size() < 1) {
      // if no data, just stop here
      return;
    }

    setCurrentAndMeta(ordered_union);
  }

  /**
   * Takes the resulting union and builds the {@link #current_values}
   * and {@link #meta} maps.
   * @param ordered_union The union to build from.
   */
  private void setCurrentAndMeta(final ByteMap<ExpressionDataPoint[]>
      ordered_union) {
    for (final String id : queries.keySet()) {
      current_values.put(id, new ExpressionDataPoint[ordered_union.size()]);
      // TODO - blech. Fill with a sentinel value to reflect "no data here!"
      final int[] m = new int[ordered_union.size()];
      for (int i = 0; i < m.length; i++) {
        m[i] = -1;
      }
      single_series_matrix.put(id, m);
    }

    int i = 0;
    for (final Entry<byte[], ExpressionDataPoint[]> entry : ordered_union.entrySet()) {
      final ExpressionDataPoint[] idps = entry.getValue();
      for (int x = 0; x < idps.length; x++) {
        final ExpressionDataPoint[] current_dps =
            current_values.get(index_to_names[x]);
        current_dps[i] = idps[x];
        final int[] m = single_series_matrix.get(index_to_names[x]);
        if (idps[x] != null) {
          m[i] = idps[x].getIndex();
        }
      }
      ++i;
    }

    // set fills on nulls
    for (final ExpressionDataPoint[] idps : current_values.values()) {
      for (i = 0; i < idps.length; i++) {
        if (idps[i] == null) {
          idps[i] = fill_dp;
        }
      }
    }
    series_size = ordered_union.size();
  }

  /**
   * Creates a key based on the concatenation of the tag pairs then the agg
   * tag keys.
   * @param use_query_tags Whether or not to include tags returned with the
   * results or just use those group by'd in the query
   * @param include_agg_tags Whether or not to include the aggregated tags in
   * the identifier
   * @param dp The current expression data point
   * @param sub The sub query iterator
   * @return A byte array with the flattened tag keys and values. Note that
   * if the tags set is empty, this may return an empty array (but not a null
   * array)
   */
  static byte[] flattenTags(final boolean use_query_tags,
      final boolean include_agg_tags, final ExpressionDataPoint dp,
      final ITimeSyncedIterator sub) {
    if (dp.tags() == null || dp.tags().isEmpty()) {
      return HBaseClient.EMPTY_ARRAY;
    }
    final int tagk_width = TSDB.tagk_width();
    final int tagv_width = TSDB.tagv_width();

    final ByteSet query_tagks;
    // NOTE: We MAY need the agg tags but I'm not sure yet
    final int tag_size;
    if (use_query_tags) {
      int i = 0;
      if (sub.getQueryTagKs() != null && !sub.getQueryTagKs().isEmpty()) {
        query_tagks = sub.getQueryTagKs();
        for (final Map.Entry<byte[], byte[]> pair : dp.tags().entrySet()) {
          if (query_tagks.contains(pair.getKey())) {
            i++;
          }
        }
      } else {
        query_tagks = new ByteSet();
      }
      tag_size = i;
    } else {
      query_tagks = new ByteSet();
      tag_size = dp.tags().size();
    }

    final int length = (tag_size * (tagk_width + tagv_width))
        + (include_agg_tags ? (dp.aggregatedTags().size() * tagk_width) : 0);
    final byte[] key = new byte[length];
    int idx = 0;
    for (final Entry<byte[], byte[]> pair : dp.tags().entrySet()) {
      if (use_query_tags && !query_tagks.contains(pair.getKey())) {
        continue;
      }
      System.arraycopy(pair.getKey(), 0, key, idx, tagk_width);
      idx += tagk_width;
      System.arraycopy(pair.getValue(), 0, key, idx, tagv_width);
      idx += tagv_width;
    }
    if (include_agg_tags) {
      for (final byte[] tagk : dp.aggregatedTags()) {
        System.arraycopy(tagk, 0, key, idx, tagk_width);
        idx += tagk_width;
      }
    }
    return key;
  }

  @Override
  public String toString() {
    final StringBuilder buf = new StringBuilder();
    buf.append("UnionIterator(id=")
       .append(id)
       .append(", useQueryTags=")
       .append(union_on_query_tagks)
       .append(", includeAggTags=")
       .append(include_agg_tags)
       .append(", index=")
       .append(index)
       .append(", queries=")
       .append(queries);
    return buf.toString();
  }

  // Iterator implementations

  @Override
  public boolean hasNext() {
    for (final ITimeSyncedIterator sub : queries.values()) {
      if (sub.hasNext()) {
        return true;
      }
    }
    return false;
  }

  @Override
  public ExpressionDataPoint[] next(long timestamp) {
    throw new NotImplementedException();
  }

  @Override
  public long nextTimestamp() {
    long ts = Long.MAX_VALUE;
    for (final ITimeSyncedIterator sub : queries.values()) {
      if (sub != null) {
        final long t = sub.nextTimestamp();
        if (t < ts) {
          ts = t;
        }
      }
    }
    return ts;
  }

  @Override
  public int size() {
    throw new NotImplementedException();
  }

  @Override
  public ExpressionDataPoint[] values() {
    throw new NotImplementedException();
  }

  @Override
  public void nullIterator(int index) {
    throw new NotImplementedException();
  }

  @Override
  public int getIndex() {
    return index;
  }

  @Override
  public void setIndex(int index) {
    this.index = index;
  }

  @Override
  public String getId() {
    return id;
  }

  @Override
  public ByteSet getQueryTagKs() {
    throw new NotImplementedException();
  }

  @Override
  public void setFillPolicy(NumericFillPolicy policy) {
    this.fill_policy = policy;
  }

  @Override
  public NumericFillPolicy getFillPolicy() {
    return fill_policy;
  }

  @Override
  public ITimeSyncedIterator getCopy() {
    return new UnionIterator(this);
  }

  @Override
  public void next() {
    if (!hasNext()) {
      throw new IllegalDataException("No more data");
    }
    for (final ITimeSyncedIterator sub : queries.values()) {
      sub.next(timestamp);
    }
    // reset the fill data point
    fill_dp.reset(timestamp, fill_policy.getValue());
    timestamp = nextTimestamp();
  }

  @Override
  public Map<String, ExpressionDataPoint[]> getResults() {
    return current_values;
  }

  @Override
  public int getSeriesSize() {
    return series_size;
  }

  @Override
  public boolean hasNext(int index) {
    for (final Entry<String, int[]> entry : single_series_matrix.entrySet()) {
      final int idx = entry.getValue()[index];
      if (idx >= 0 && queries.get(entry.getKey()).hasNext(idx)) {
        return true;
      }
    }
    return false;
  }

  @Override
  public void next(int index) {
    if (!hasNext()) {
      throw new IllegalDataException("No more data");
    }
    for (final Entry<String, int[]> entry : single_series_matrix.entrySet()) {
      final int idx = entry.getValue()[index];
      if (idx >= 0) {
        queries.get(entry.getKey()).next(idx);
      }
    }
  }

}