Source: utils/parse.js

/**
 * Utility functions for parsing and cleaning raw CSV data into normalized
 * `meta` and `data` tables used by the Monitor class.
 *
 * These functions:
 * - Replace string `'NA'` with `null`
 * - Convert numeric fields to floats
 * - Optionally restrict metadata columns to core subsets
 * - Replace negative measurements (e.g. PM2.5) with zero
 *
 * Intended for internal use only.
 */

import * as aq from 'arquero';
const op = aq.op;

import { validateDataTable } from './helpers.js';

const FLOAT_COLUMNS = ['longitude', 'latitude', 'elevation'];

/**
 * Parses and cleans a metadata table.
 *
 * - Replaces string `'NA'` values with `null`
 * - Converts longitude, latitude, and elevation to floats
 * - Restricts output columns to `metadataNames` unless `useAllColumns` is true
 *
 * @param {aq.Table} dt - Raw Arquero table from CSV.
 * @param {boolean} [useAllColumns=false] - Whether to retain all columns.
 * @param {string[]} [metadataNames=[]] - Subset of columns to keep if `useAllColumns` is false.
 * @returns {aq.Table} Cleaned and optionally filtered metadata table.
 */
export function parseMeta(dt, useAllColumns = false, metadataNames = []) {
  const columns = dt.columnNames();
  const selectedColumns = useAllColumns ? columns : metadataNames;

  // Replace 'NA' with null
  const values1 = {};
  columns.forEach(col => {
    values1[col] = aq.escape(d => d[col] === 'NA' ? null : d[col]);
  });

  // Parse longitude, latitude, and elevation as floats (if present)
  const floatValues = {};
  FLOAT_COLUMNS.filter(col => columns.includes(col)).forEach(col => {
    floatValues[col] = aq.escape(d => parseFloat(d[col]));
  });

  return dt.derive(values1).derive(floatValues).select(selectedColumns);
}

/**
 * Parses and cleans a time-series measurement table.
 *
 * - Skips the first column (assumed to be `datetime`)
 * - Replaces string `'NA'` with `null`
 * - Converts all values to floats
 * - Replaces negative values with zero
 * - Replaces non-finite values (e.g. NaN, Infinity) with null
 * - Validates the final table using validateDataTable()
 *
 * @param {aq.Table} dt - Raw Arquero table from CSV.
 * @returns {aq.Table} Cleaned data table suitable for use in a Monitor object.
 * @throws {Error} If validation fails after cleaning.
 */
export function parseData(dt) {
  const ids = dt.columnNames().slice(1); // skip 'datetime'

  // Replace 'NA' with null, then parse as float
  const values1 = {};
  ids.forEach(id => {
    values1[id] = `d => d['${id}'] === 'NA' ? null : op.parse_float(d['${id}'])`;
  });

  // Replace negative values with zero
  const values2 = {};
  ids.forEach(id => {
    values2[id] = `d => d['${id}'] < 0 ? 0 : d['${id}']`;
  });

  // Replace non-finite values (e.g. NaN, Infinity) with null
  const values3 = {};
  ids.forEach(id => {
    values3[id] = `d => d['${id}'] != null && !op.is_finite(d['${id}']) ? null : d['${id}']`;
  });

  const cleaned = dt.derive(values1).derive(values2).derive(values3);

  // Final check: ensure datetime is valid + hourly, and all data are numeric or null
  validateDataTable(cleaned);

  return cleaned;
}