/**
 * Query sources: chatbot, UI
 * Decouple profile, but use single context object which responsible for capturing user profile too.
 * s.t. it is easier for profile-agnostic cases like showing e.g. feed of sports.
 * e.g. use category, not interests
 */

// TODO refactor & generalize queries for both app/chatbot
// Test:
// Enter from random article
// Multiple queries isn't too expensive.
// ES to do heavy loading
// Context: Similar (Current) article,
// Context: (TODO) History of articles, day etc

import _ from 'lodash';
import bodybuilder from 'bodybuilder';
import { search, indexDocBulk, EsQuery } from '~/adapters/elasticsearch';
import { format } from 'date-fns';
import Profile from '~/domain/wordquest/profile/profile';
import Article, { AnalyzedArticle } from '~/domain/wordquest/article';
import striptags from 'striptags';
import rootLogger from '~/app/logger';
import { Entity } from '~/domain/wordquest/entity';
import {
  DataEvent,
  ArticleUpsertedDataEvent
} from '~/domain/wordquest/event/data-event';
import { DEFAULT_DIFFICULTY_LEVEL } from '~/domain/wordquest/ql/difficulty-scale';
import { isNotEmptyArray } from '~/utils';
import { Locale, BASE_LOCALE, SUPPORTED_LOCALES } from '@wordquest/locales';
import {
  ES_INDEX_BY_ENTITY_LOCALE,
  ES_CONFIG
} from '~/app/elasticsearch.config';
import {
  BaseQueryContext,
  withIncludeIdsFilter,
  withNotFilter,
  withExcludeIdsFilter,
  withPaging,
  withMustFilter
} from '~/app/repo-es-util';
import { range, interval, fromEvent, of, pipe, from, zip } from 'rxjs';
import {
  take,
  filter,
  mergeAll,
  toArray,
  flatMap,
  distinct,
  tap,
  map,
  delay,
  takeUntil
} from 'rxjs/operators';
import { articleTimeRangeStart } from './time-range';
import { Category } from '~/domain/wordquest/category';

export const ES_INDEX_BY_LOCALE = ES_INDEX_BY_ENTITY_LOCALE[Entity.Article];
const logger = rootLogger.child({ module: 'article-repo' });
// TODO migrate as _doc
const INDEX_TYPE = '_doc';

// FT search: title, content
export const ES_MAPPING_ARTICLE_BY_LOCALE = _.fromPairs(
  SUPPORTED_LOCALES.map((sourceLocale) => [
    sourceLocale,
    {
      // store cannot be specificed. in _source anyway
      // https://www.elastic.co/guide/en/elasticsearch/reference/6.3/object.html
      properties: {
        author: {
          properties: {
            name: {
              type: 'text'
            }
          }
        },
        tokenInfoByToken: {
          enabled: false
        },
        analyzedAt: {
          type: 'date',
          ignore_malformed: true
        },
        publishedAt: {
          type: 'date',
          ignore_malformed: true
        },
        difficultyLevel: {
          type: 'short',
          null_value: DEFAULT_DIFFICULTY_LEVEL
        },
        qualityScore: {
          type: 'short',
          null_value: 100
        }
      }
    }
  ])
);

// override with fields
export type ArticleEsDoc = {
  _id: string;
} & AnalyzedArticle;
// contentSantized: string

export type ArticleQueryContext = BaseQueryContext & {
  fields?: {
    isIdOnly?: boolean; // takes priority
    isWithTokenInfoByToken?: boolean;
  };
  // just cascade
  terms?: {
    title?: string;
    content?: string;
    category?: Category[];
    tagKeys: string[];
  };
  isAnalyzed?: boolean;
  isTermsAMust?: boolean;
  difficultyLevel?: number;
  similarArticle?: {
    id: string;
    category: string;
    sourceName: string;
  };
};

/**
 * Progressive loading:
 * Avoid loading tokenInfoByToken until enter read-article for both speed & memory concern(e.g. Chatbot context)
 * But pre-load content for UX
 * Ignore platform limitations (e.g. chatbot, use context queries)
 *
 * Progressive loading:
 * Avoid loading tokenInfoByToken until enter read-article for both speed & memory concern(e.g. Chatbot context)
 * But pre-load content for UX in App
 */

const DEFAULT_SOURCE_FIELDS = [
  'title',
  'sourceName',
  'semanticKey',
  'category',
  'content',
  'contentTagged',
  'publishedAt',
  'difficultyLevel',
  'qualityScore',
  'thumbnailUrl',
  'originalUrl',
  'tagKeys'
];

// Create context for Composition (callers responsible to override by order)
export function createQueryContextWithProfile(
  profile: Profile
): ArticleQueryContext {
  const context = {};
  const difficultyLevel = profile.difficultyTarget;
  if (_.isNumber(difficultyLevel)) {
    _.merge(context, {
      difficultyLevel
    });
  }
  if (!_.isEmpty(profile.interests)) {
    return _.merge(context, {
      terms: {
        category: profile.interests as Category[]
      }
    });
  }

  return context;
}

/**
 * Main Goal is to increase CTR & session
 * Ignore time for now.
 *
 * Hard to merge MLT query with normal query
 *
 */
function buildQueryArticlesWithSimilarArticleContext(
  locale: Locale,
  context: ArticleQueryContext
): EsQuery {
  const { similarArticle } = context;
  if (!similarArticle) {
    throw new Error('No similar Article');
  }

  let body = bodybuilder().query('more_like_this', {
    fields: ['title', 'content'],
    like: [
      {
        _index: ES_INDEX_BY_LOCALE[locale],
        _id: similarArticle.id
      }
    ],
    min_term_freq: 1,
    max_query_terms: 12
  });

  body = withNotFilter(context)(body);
  if (context) {
    body = body
      .orFilter('term', 'sourceName', similarArticle.sourceName)
      .orFilter('term', 'category', similarArticle.category)
      .sort([
        {
          publishedAt: {
            order: 'desc'
          }
        },
        {
          qualityScore: {
            order: 'desc'
          }
        }
      ]);
  }

  return body.build();
}

export function buildArticleQuery(
  locale: Locale,
  context: ArticleQueryContext
): EsQuery {
  if (!_.isEmpty(context.similarArticle)) {
    return buildQueryArticlesWithSimilarArticleContext(locale, context);
  }

  const difficultyLevel = context.difficultyLevel || DEFAULT_DIFFICULTY_LEVEL;

  // For perf, we want to determine at query time ideal articles
  // Although possible to query content length, we avoid playing around with scripts & using cached field for decay
  // Other fields requiring pre process: words difficulty
  // Also: not to be confused with span query which is for in-text proximity
  // to query for field (content)'s length, use range query with boost / use script query
  // https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html

  let body = bodybuilder();
  // .query('function_score', {
  //   // multiply=query score and function score is multiplied (default)
  //   // replace=only function score is used, the query score is ignored
  //   // score_mode: 'multiply',
  //   functions: [
  //     {
  //       weight: 1,
  //       field_value_factor: {
  //         field: 'qualityScore',
  //         factor: 0.1,
  //         modifier: 'none',
  //         missing: 0
  //       }
  //     },
  //     // index corrupted
  //     {
  //       weight: 1,
  //       gauss: {
  //         publishedAt: {
  //           origin: format(new Date(), 'yyyy-MM-dd'),
  //           scale: '1d',
  //           offset: '1d',
  //           // i.e.  fair for articles today
  //           // aggresive decay to avoid repeated articles
  //           decay: 0.2
  //         }
  //       }
  //     },
  //     {
  //       weight: 1,
  //       gauss: {
  //         difficultyLevel: {
  //           origin: difficultyLevel,
  //           scale: 30,
  //           offset: 1,
  //           decay: 0.2
  //         }
  //       }
  //     }
  //   ]
  // });
  // No point to sort by fields which affect score
  // When sorting on a field, scores are not computed.
  // https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-sort.html
  // .sort([]);

  let sourceFields = DEFAULT_SOURCE_FIELDS;
  const fields = context.fields || {};
  if (fields.isIdOnly) {
    sourceFields = ['id'];
  } else if (fields.isWithTokenInfoByToken) {
    sourceFields = sourceFields.concat(['tokenInfoByToken']);
  }
  body = body.rawOption('_source', sourceFields);
  // .rawOption('explain', true);

  const { isAnalyzed } = context;

  // no default value to support querying both when needed
  if (_.isBoolean(isAnalyzed)) {
    body = body.orFilter('term', 'isAnalyzed', isAnalyzed);
  }

  // by convention, return only those of  includeIds
  if (isNotEmptyArray(_.get(context, 'filter.includeIds'))) {
    body = withIncludeIdsFilter(context)(body);
  } else if (!context.isIgnorePublishedAt) {
    body = body.query('range', 'publishedAt', { gt: articleTimeRangeStart() });
  }

  body = withExcludeIdsFilter(context)(body);

  // TODO some cases we do want strict filter instead of boosting i.e. filter
  const terms = context.terms || {};

  _.forEach(terms, (items, termKey) => {
    if (isNotEmptyArray(items)) {
      _.forEach(items, (item) => {
        body = body.orFilter('match', termKey, {
          query: item,
          boost: 3.0
        });
      });

      if (!context.isTermsAMust) {
        body = body.orFilter('match', termKey, {
          query: 'others',
          boost: 0.5
        });
      }

      // simple soln for fallback now: add others for everyone
    }
  });

  body = withPaging(context)(withMustFilter(context)(body));

  return body.build();
}

// export const asArticle = (obj:object) => Article.create(obj)

// TODO migrate inside @Article
export const asArticle = (doc: {
  _id: string;
  _source: ArticleEsDoc;
}): Article => Article.create(_.merge({ id: doc._id }, doc._source));
// {
//   contentSantized: striptags(doc._source.content)
// }

// Could be Article / AnalyzedArticle
export function queryArticles(
  locale: Locale,
  context: ArticleQueryContext = {}
): Promise<Article[]> {
  // Context has similarToArticleId:
  // always use MLT
  const query = buildArticleQuery(locale, context);

  logger.debug('[Debug]query & context', query, context);

  return search(ES_CONFIG.APP_CONTENT, ES_INDEX_BY_LOCALE[locale], query).then(
    (res) => res.hits.hits.map(asArticle)
  );
}

export const mapArticleAsDoc = (article: AnalyzedArticle): ArticleEsDoc =>
  Object.assign(article.toObject(), {
    id: article.id,
    semanticKey: article.semanticKey,
    content: article.contentSantized || article.content,
    tokenInfoByToken: article.tokenInfoByToken || {}
  });

// TODO potential for partial updates?
// Full - when words changed
// microlink?
export const mapArticleDataEventAsDoc = (
  event: ArticleUpsertedDataEvent
): ArticleEsDoc => {
  const { article } = event.properties;
  console.log('mapArticleDataEventAsDoc', event);
  console.log('toObject', article.toObject());

  return mapArticleAsDoc(article as AnalyzedArticle);
};

export const indexArticlesByLocale =
  (locale: Locale) => (events: ArticleUpsertedDataEvent[]) =>
    indexDocBulk(
      ES_CONFIG.APP_CONTENT_ADMIN,
      ES_INDEX_BY_LOCALE[locale],
      events.map(mapArticleDataEventAsDoc),
      {
        type: INDEX_TYPE,
        timeout: '4m'
      }
    );

// TODO Supports mixing recent/ unread ones with ids from profile
export const queryArticlesGroupedByCategoryAndRead = async () => {
  await Promise.resolve();

  // return Object.assign(unreads, {
  //   recents:
  //     _.sortBy(
  //       // _.get(profile:Profile, 'historyArticles') || [],
  //       article => article.lastReadAt
  //     )
  // });
};

/**
 * TODO: use ES agg instead, in order to limit by category
 * https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html
 */
export const queryArticleGroupedByCategory = async (
  locale: Locale,
  context: ArticleQueryContext = {}
): Promise<Record<string, AnalyzedArticle[]>> => {
  const articles: AnalyzedArticle[] = await queryArticles(locale, context);

  return _.groupBy(articles, (article) => article.category);
};

export const statsArticles = async (locale: Locale) => {
  const articlesByCategory = await queryArticleGroupedByCategory(locale, {
    paging: {
      size: 500
    }
  });

  return _.mapValues(articlesByCategory, (articles) =>
    _.countBy(articles, (a) => format(a.publishedAt, 'YYYYMMDD'))
  );
};
