/**
 * @license
 * Copyright 2024 Google LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import {Injectable} from '@angular/core';
import * as Papa from 'papaparse';
import {Observable, of as observableOf, throwError} from 'rxjs';
import {catchError, map, switchMap} from 'rxjs/operators';

interface ParsedCsv {
  header: string[];
  dataSource: Array<Record<string, string | number>>;
}

/**
 * A file reader service to extract data from a csv file.
 */
@Injectable({providedIn: 'root'})
export class FileReaderService {
  private static readonly ADDITIONAL_COLUMN_START_INDEX = 2;
  private static readonly CONTROL_GROUP = 'Control'; // Reserved name for control group
  private static readonly GROUP_KEY = 'Groups'; // Column name for groups
  private static readonly MAX_NUM_TEST_GROUPS = 9;
  private static readonly TIMESTAMP_COLUMN_START_INDEX = 1;

  /**
   * Reads the first row from the given file.
   *
   * @param file The file from which rows are read.
   * @return An observable emitting the first row as an array of strings.
   */
  readFirstRow(file: File): Observable<string[]> {
    return new Observable((observer) => {
      Papa.parse(file, {
        complete: (result) => {
          if (result.data.length > 0) {
            observer.next(result.data[0] as string[]);
            observer.complete();
          } else {
            observer.error(new Error('No data found in the CSV file'));
          }
        },
        header: false,
        skipEmptyLines: true,
      });
    });
  }

  /**
   * Gets the additional columns defined in the configuration for splitting.
   *
   * @param file The file from which rows are read.
   * @return An observable emitting the additional
   *     columns as an array of strings.
   */
  getConfigAdditionalColumns(file: File): Observable<string[]> {
    return this.readFirstRow(file).pipe(
      map((row) =>
        row
          .slice(FileReaderService.ADDITIONAL_COLUMN_START_INDEX)
          .map((ele) => ele.trim()),
      ),
    );
  }

  /**
   * Extracts the test group names stored in the first column of the aggregated
   * KPI table. The test groups should have names other than 'Control'.
   *
   * @param file The file from which test groups are extracted.
   * @param sep The separator in the input CSV file. The default
   *     value is comma.
   * @param singleControlGroupMode Whether to use single control group mode
   *  or mode with several control groups/covariates.
   *  Example of expected CSV format for single control group mode :
   *  Groups,    2022-01-01, 2022-01-02, 2022-01-03
   *  Control,          4.0,        2.0,        3.0
   *  Test_1,           3.0,        3.0,        2.0
   *  Test_2,           5.0,        5.0,        5.0
   *  Example of expected CSV format for multi-control group mode :
   *  Groups,    2022-01-01, 2022-01-02, 2022-01-03
   *  Test_1,           3.0,        3.0,        2.0
   *  Test_2,           5.0,        5.0,        5.0
   *  Covar_1,          4.0,        2.0,        3.0
   *  Covar_2,          3.0,        5.0,        1.0
   * @return {Observable<string[]>} An observable emitting the test group names
   *     as an array of strings.
   */
  extractAllTestGroups(
    file: File,
    sep = ',',
    singleControlGroupMode = true,
  ): Observable<string[]> {
    return new Observable((observer) => {
      const reader = new FileReader();
      reader.onload = (e) => {
        if (e.target && e.target.readyState === FileReader.DONE) {
          const contents = e.target.result as string;
          const rows = contents.split(/\r?\n/); // Split the contents either by CRLF or LF.

          // Remove the tailing row when the file ends with EOL char.
          if (rows[rows.length - 1] === '') {
            rows.pop();
          }

          if (rows.length === 0) {
            observer.error(new Error('The file is empty.'));
            observer.complete();
          }

          // Validate the first row.
          const groupKey = rows[0].split(sep).at(0)?.trim();
          if (groupKey !== FileReaderService.GROUP_KEY) {
            observer.error(
              new Error(`The group key '${groupKey}' does not match 'Group'.`),
            );
            observer.complete();
          }

          const testGroups: string[] = [];

          if (singleControlGroupMode) {
            // Validate the second row.
            const controlGroup = rows[1].split(sep).at(0)?.trim();
            if (controlGroup !== FileReaderService.CONTROL_GROUP) {
              observer.error(
                new Error(
                  `The control group name '${controlGroup}' does not match 'Control'.`,
                ),
              );
              observer.complete();
            }

            // Validate the third and subsequnt rows.
            for (let i = 2; i < rows.length; i++) {
              const firstColumn = rows[i].split(sep).at(0)?.trim();
              if (firstColumn === `Test_${i - 1}`) {
                testGroups.push(firstColumn);
              } else {
                observer.error(
                  new Error(
                    `The test group name '${firstColumn}' does not match 'Test_${
                      i - 1
                    }'.`,
                  ),
                );
                observer.complete();
              }
            }
          } else {
            const firstRow = rows[1].split(sep).at(0)?.trim();
            if (firstRow !== 'Test_1') {
              observer.error(
                new Error(
                  `Invalid Format: 'Test_1' expected on first row, received ${firstRow}.`,
                ),
              );
              observer.complete();
            }
            // In multicontrol groups/covariates mode, 'Control' is
            // not present before Test_1, ... Test_N.`
            for (let i = 1; i < rows.length; i++) {
              const firstColumn = rows[i].split(sep).at(0)?.trim();
              if (firstColumn === `Test_${i}`) {
                testGroups.push(firstColumn);
              } else {
                break;
              }
            }
          }

          if (testGroups.length === 0) {
            observer.error(new Error('The test groups are empty.'));
            observer.complete();
          }

          if (testGroups.length > FileReaderService.MAX_NUM_TEST_GROUPS) {
            observer.error(
              new Error(
                `The number of test groups [${testGroups.length}] exceeds the size limit [${FileReaderService.MAX_NUM_TEST_GROUPS}].`,
              ),
            );
            observer.complete();
          }

          observer.next(testGroups);
          observer.complete();
        }
      };

      reader.onerror = (error) => {
        observer.error(error);
        observer.complete();
      };

      reader.readAsText(file);
    });
  }

  /**
   * Extracts all control groups/covariates names stored in the first column of
   * the aggregated KPI data.
   *
   * @param file The file from which test groups are extracted.
   * @param sep The separator in the input CSV file. The default
   *     value is comma.
   * @return {Observable<[string[], string[]]>} An observable emitting the
   *     covariates/control group names as arrays of strings.
   */
  extractAllCovariates(file: File, sep = ','): Observable<string[]> {
    return new Observable((observer) => {
      const reader = new FileReader();
      reader.onload = (e) => {
        if (e.target && e.target.readyState === FileReader.DONE) {
          const contents = e.target.result as string;
          const rows = contents.split(/\r?\n/); // Split the contents either by CRLF or LF.

          // Remove the tailing row when the file ends with EOL char.
          if (rows[rows.length - 1] === '') {
            rows.pop();
          }

          // Iterate through the test groups.
          let i = 1;
          for (; i < rows.length; i++) {
            const firstColumn = rows[i].split(sep).at(0)?.trim();
            if (firstColumn === `Test_${i}`) {
              continue;
            }
            break;
          }
          const covariates: string[] = [];
          // Iterate through the covariates/Control groups.
          const testGroupRegex = new RegExp('^Test_[0-9]+$');
          for (; i < rows.length; i++) {
            const firstColumn = rows[i].split(sep).at(0)?.trim();
            if (typeof firstColumn === 'string' && firstColumn.length > 0) {
              if (testGroupRegex.test(firstColumn)) {
                observer.error(
                  new Error(
                    `Invalid Format: ${firstColumn} for control group/covariate.`,
                  ),
                );
                observer.complete();
              }
              covariates.push(firstColumn);
            }
          }

          if (covariates.length === 0) {
            observer.error(
              new Error('The covariates/control groups are empty.'),
            );
            observer.complete();
          }

          observer.next(covariates);
          observer.complete();
        }
      };

      reader.onerror = (error) => {
        observer.error(error);
        observer.complete();
      };

      reader.readAsText(file);
    });
  }

  /**
   * Extracts the first and last sample dates from the KPI file.
   *
   * @param file The file from which test groups are extracted.
   * @return An observable emitting the first and
   *     last sample dates.
   */
  extractFirstAndLastDates(file: File): Observable<[string, string]> {
    return this.readFirstRow(file).pipe(
      switchMap((columns) => {
        if (columns.length >= 2) {
          const firstAndLastDate: [string, string] = [
            columns[FileReaderService.TIMESTAMP_COLUMN_START_INDEX].trim(),
            columns[columns.length - 1].trim(),
          ];
          return observableOf(firstAndLastDate);
        } else {
          return throwError(
            () => new Error('The file has less than two columns.'),
          );
        }
      }),
      catchError((error) => {
        return throwError(() => error);
      }),
    );
  }

  /**
   * Reads and parses the specified number of rows from the given CSV file.
   * The function assumes the input file has either CRLF or LF end-of-line (EOL)
   * character.
   * The first row is treated as the header row.
   * The number of columns to be read is specified by the argument numColumns.
   * If the value is not specified or not a positive integer, all columns
   * are extracted. The output has the following format:
   * [{'column1': 'value1', 'column2': 'value2', ...}, ...]
   *
   * @param file The file from which rows are read.
   * @param numRows The number of rows excluding the header to read from the
   *     file.
   * @param numColumns The number of columns to extract from each row.
   * @return An observable emitting the extracted rows as an array of objects.
   */
  readCSV(
    file: File,
    numRows: number,
    numColumns: number | null,
  ): Observable<ParsedCsv> {
    return new Observable((observer) => {
      Papa.parse(file, {
        header: true, // Extract header.
        preview: numRows,
        skipEmptyLines: true,
        complete: (results: Papa.ParseResult<Record<string, string>>) => {
          const data = results.data;
          const header = results.meta.fields ?? [];

          // All columns are extracted when numColumns is not specified or not
          // a valid positive integer.
          const numColumnsUpdated =
            !numColumns || !Number.isInteger(numColumns) || numColumns <= 0
              ? header.length
              : numColumns;

          const parsedData: Array<Record<string, string>> =
            // Filter columns in each row.
            data.map((row: Record<string, unknown>) => {
              const slicedRow: Record<string, string> = {};
              header.slice(0, numColumnsUpdated).forEach((column) => {
                slicedRow[column] = String(row[column]);
              });
              return slicedRow;
            });
          const parsedCsv = {
            header: header.slice(0, numColumnsUpdated),
            dataSource: parsedData,
          };
          observer.next(parsedCsv);
          observer.complete();
        },
        error: (error) => observer.error(error),
      });
    });
  }

  /**
   * Reads and parses the JSON file.
   *
   * @param file The JSON file.
   * @return An observable emitting the parsed JSON object.
   */
  readJson(file: File): Observable<Array<Record<string, unknown>>> {
    return new Observable((observer) => {
      const reader = new FileReader();
      reader.onload = (e) => {
        if (
          e.target?.readyState === FileReader.DONE &&
          typeof e.target.result === 'string'
        ) {
          const contents = e.target.result;
          try {
            // Parse the JSON string with datetime converter.
            const json = JSON.parse(contents, this.convertDateString);
            observer.next(json);
          } catch (error) {
            // Parse error.
            observer.error(error);
          }
        }
        observer.complete();
      };

      reader.onerror = () => {
        // File open error.
        observer.error(new Error('File open error.'));
        observer.complete();
      };

      reader.readAsText(file);
    });
  }

  /**
   * Detects strings with ISO 8601 UTC format (YYYY-MM-DDTHH:MM:SS.sssZ)
   * and converts them to Date objects.
   * Used as the reviver when a JSON string is parsed to an object.
   */
  private convertDateString(key: string, value: unknown): unknown | Date {
    if (
      typeof value === 'string' &&
      /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?Z$/.test(value)
    ) {
      return new Date(value);
    }
    return value;
  }
}
