Source: helpers.js

/**
 * Helper methods for running aggregation tests
 *
 * This wraps internal functionality and provides utilities for reading and writing expected API formats
 */
import numeric from '../lib/numeric-1.2.6';
import { REGEX_EPACTS } from './constants';
import { _AggregationTest, SkatTest, ZegginiBurdenTest, VTTest, SkatOptimalTest } from './stats';

const _all_tests = [ZegginiBurdenTest, SkatTest, VTTest, SkatOptimalTest];

/**
 * Look up aggregation tests by unique name.
 *
 * This is a helper for external libraries; it provides an immutable registry of all available tests.
 * TODO would be nice to get rid of this?
 *
 *
 * {key: {label: String, constructor: Object }
 * @type {{String: {label: String, constructor: function}}}
 */
const AGGREGATION_TESTS = Object.freeze(_all_tests.reduce(function (acc, constructor) {
  const inst = new constructor();  // Hack- need instance to access attributes
  acc[inst.key] = { label: inst.label, constructor: constructor };
  return acc;
}, {}));


/**
 * Helper object for reading and interpreting variant data
 */
class PortalVariantsHelper {
  constructor(variants_array) {
    this._variants = variants_array;
    this._variant_lookup = this.parsePortalVariantData(variants_array);
  }

  get data() {  // Raw unparsed data
    return this._variants;
  }

  parsePortalVariantData(variants) {
    // Read an array of variants. Parse names into position/ref/alt, and assign altFreq to MAF.
    // Return a hash keyed on variant ID for quick lookups.
    let lookup = {};
    variants.forEach((data) => {
      let { variant, altFreq, pvalue, score } = data;
      let [_, chrom, pos, ref, alt, __] = variant.match(REGEX_EPACTS);  // eslint-disable-line no-unused-vars

      let effectFreq = altFreq;
      let effect = alt;

      /**
       * The variant's score statistic in the API is coded toward the alternate allele.
       * However, we want the effect coded towards the minor allele, since most rare variant tests assume
       * you are counting the rare/minor allele.
       */
      if (altFreq > 0.5) {
        /**
         * The effect allele is initially the alt allele. Since we're flipping it,
         * the "other" allele is the reference allele.
         */
        score = -score;
        effect = ref;

        // This is also now the minor allele frequency.
        effectFreq = 1 - altFreq;
      }

      lookup[variant] = {
        variant,
        chrom,
        pos,
        pvalue,
        score,
        altAllele: alt,
        effectAllele: effect,
        altFreq: altFreq,
        effectFreq: effectFreq,
      };
    });
    return lookup;
  }

  isAltEffect(variant_names) {  // Some calculations are sensitive to whether alt is the minor (effect) allele
    return variant_names.map((name) => {
      const variant_data = this._variant_lookup[name];
      return variant_data.altAllele === variant_data.effectAllele;
    });
  }

  getEffectFreq(variant_names) {
    // Get the allele freq for the minor (effect) allele
    return variant_names.map((name) => this._variant_lookup[name].effectFreq);
  }

  getScores(variant_names) {
    // Get single-variant scores
    return variant_names.map((name) => this._variant_lookup[name].score);
  }

  getGroupVariants(variant_names) {
    // Return all that is known about a given set of variants
    return variant_names.map((name) => this._variant_lookup[name]);
  }
}

// Utility class. Provides helper methods to access information about groups and generate subsets
class PortalGroupHelper {
  constructor(groups) {
    this._groups = groups;
    this._lookup = this._generateLookup(groups);
  }

  get data() {  // Raw unparsed data
    return this._groups;
  }

  byMask(selection) {  // str or array
    // Get all groups that identify as a specific category of mask- "limit the analysis to loss of function variants
    // in any gene"
    if (!Array.isArray(selection)) {
      selection = [selection];
    }
    selection = new Set(selection);

    const subset = this._groups.filter((group) => selection.has(group.mask));
    return new this.constructor(subset);
  }

  byGroup(selection) {  // str or array
    // Get all groups based on a specific group name, regardless of mask. Eg, "all the ways to analyze data for a
    // given gene".
    if (!Array.isArray(selection)) {
      selection = [selection];
    }
    selection = new Set(selection);

    const subset = this._groups.filter((group) => selection.has(group.group));
    return new this.constructor(subset);
  }

  _generateLookup(groups) {
    // We don't transform data, so this is a simple name -> position mapping
    return groups.reduce((acc, item, idx) => {
      const key = this._getKey(item.mask, item.group);
      acc[key] = idx;
      return acc;
    }, {});
  }

  _getKey(mask_name, group_name) {
    return `${mask_name},${group_name}`;
  }

  getOne(mask_name, group_name) {
    // Get a single group that is fully and uniquely identified by group + mask
    const key = this._getKey(mask_name, group_name);
    const pos = this._lookup[key];
    return this._groups[pos];
  }

  makeCovarianceMatrix(group, is_alt_effect) {
    // Helper method that expands the portal covariance format into a full matrix.
    // Load the covariance matrix from the response JSON
    const n_variants = group.variants.length;
    let covmat = new Array(n_variants);
    for (let i = 0; i < n_variants; i++) {
      covmat[i] = new Array(n_variants).fill(null);
    }

    let c = 0;
    for (let i = 0; i < n_variants; i++) {
      for (let j = i; j < n_variants; j++) {
        let v = group.covariance[c];
        let iAlt = is_alt_effect[i];
        let jAlt = is_alt_effect[j];

        /**
         * The API spec codes variant genotypes towards the alt allele. If the alt allele frequency
         * is > 0.5, that means we're not counting towards the minor (rare) allele, and we need to flip it around.
         * We don't flip when i == j because that element represents the variance of the variant's score, which is
         * invariant to which allele we code towards (but covariance is not.)
         *
         * We also don't flip when both the i variant and j variant need to be flipped (the ^ is XOR) because it would
         * just cancel out.
         */
        if (i !== j) {
          if ((!iAlt) ^ (!jAlt)) {
            v = -v;
          }
        }

        covmat[i][j] = v;
        covmat[j][i] = v;

        c += 1;
      }
    }

    covmat = numeric.mul(group.nSamples, covmat);
    return covmat;
  }
}

/**
 * Run one or more burden tests. This will operate in sequence: all specified tests on all specified masks
 *
 * The actual call signature of a burden test is pretty low-level. In addition to running the list of tests,
 *  this helper also restructures human-friendly mask and variant representations into a shape that works directly
 *  with the calculation.
 */
class PortalTestRunner {
  /**
   * Create a test runner object, using group and variant data of the form provided by `parsePortalJSON`. Generally,
   *  this helper is a convenience wrapper based on the raremetal.js API format spec, and hence it expects
   *  variant and group definitions to follow that spec.
   * @param groups PortalGroupHelper
   * @param variants PortalVariantsHelper
   * @param test_names {String[]|_AggregationTest[]}
   */
  constructor(groups, variants, test_names = []) {
    this.groups = groups;
    this.variants = variants;
    this._tests = [];

    test_names.forEach((name) => this.addTest(name));
  }

  /**
   *
   * @param test {String|_AggregationTest}
   * @return {_AggregationTest}
   */
  addTest(test) {
    // Add a new test by name, or directly from an instance
    if (typeof test === 'string') {
      let type = AGGREGATION_TESTS[test];
      if (!type) {
        throw new Error(`Cannot make unknown test type: ${test}`);
      }
      test = new type.constructor();
    } else if (!(test instanceof _AggregationTest)) {
      throw new Error('Must specify test as name or instance');
    }
    this._tests.push(test);
    return test;
  }

  /**
   * Run every test on every group in the container and return results
   * @returns Promise A promise representing the fulfillment state of all tests being run
   */
  run() {
    let partials = [];

    this._tests.forEach((test) => {
      this.groups.data.forEach((group) => {
        partials.push(this._runOne.bind(this, test, group));
      });
    });
    // Despite the async syntax, ensure that each tests is run in series, to mitigate memory allocation errors when
    //  running many tests
    return partials.reduce((results, one_test) => {
      return results.then((all_prior) => {
        return one_test().then((one_res) => {
          return [...all_prior, one_res];
        });
      });
    }, Promise.resolve([]));
  }

  /**
   *
   * @param {AggregationTest} test Instance for a single unit test
   * @param group {Object} Data corresponding to a specific group, following API format docs
   * @returns {{groupType: *, stat: *, test: *, pvalue: *, variants: (*|Array|string[]|Map), group: *, mask: (*|string|SVGMaskElement|string)}}
   * @private
   */
  _runOne(test, group) {
    // Helper method that translates portal data into the format expected by a test.
    const variants = group.variants;
    const scores = this.variants.getScores(variants);

    // Most calculations will require adjusting API data to ensure that minor allele is the effect allele
    const isAltEffect = this.variants.isAltEffect(variants);

    const cov = this.groups.makeCovarianceMatrix(group, isAltEffect);
    const mafs = this.variants.getEffectFreq(variants);
    let weights;  // TODO: The runner never actually uses the weights argument. Should it allow this?

    // Some test classes may return a raw value and others will return a promise. Wrap the result for consistency.
    let result = test.run(scores, cov, weights, mafs);
    return Promise.resolve(result)
      .then(([stat, pvalue, effect, se]) => {
        // The results describe the group + several new fields for calculation results.
        return {
          groupType: group.groupType,
          group: group.group,
          mask: group.mask,
          variants: group.variants,

          test: test.key,
          stat,
          pvalue,
          effect,
          se,
        };
      });
  }

  /**
   * Generate a JSON representation of the results. Returns a Promise, because some methods may run asynchronously
   *  (eg via web workers), or require loading external libraries (eg webassembly)
   * @param results Array
   * @returns {Promise<{data: {groups: Promise<any> | Array, variants: *}} | never>}
   */
  toJSON(results) {
    // Output calculation results in a format that matches the "precomputed results" endpoint
    // By passing in an argument, user can format any set of results (even combining multiple runs)
    if (!results) {
      results = this.run();
    } else {
      results = Promise.resolve(results);
    }

    return results.then((group_results) => {
      return {
        data: {
          variants: this.variants.data,
          groups: group_results,
        },
      };
    });
  }
}


function parsePortalJSON(json) {
  const data = json.data || json;
  const groups = new PortalGroupHelper(data.groups.map((item) => {
    // Each group should have access to fields that, in portal json, are defined once globally
    item.nSamples = data.nSamples;
    item.sigmaSquared = data.sigmaSquared;
    return item;
  }));
  const variants = new PortalVariantsHelper(data.variants);
  return [groups, variants];
}

export { PortalVariantsHelper as _PortalVariantsHelper, PortalGroupHelper as _PortalGroupHelper }; // testing only

export { parsePortalJSON, PortalTestRunner, AGGREGATION_TESTS };