-- ============================================================================
--  Rainbow Medicine — Website Chatbot
--  MySQL 8.x / MariaDB 10.4+ schema
--  Every column nullable so partial saves never fail.
-- ============================================================================

-- Shared database — also hosts the intake form (intake_*) and the
-- Health Therapies Clinics bot (htc_*). Our tables use the `chatbot_` prefix.
CREATE DATABASE IF NOT EXISTS clinics_db
  CHARACTER SET utf8mb4
  COLLATE utf8mb4_unicode_ci;

USE clinics_db;

-- ----------------------------------------------------------------------------
--  Conversations / sessions — one row per browser visitor conversation.
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_sessions (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  token           CHAR(36)         NULL,            -- public UUID, stored in localStorage
  page_url        VARCHAR(500)     NULL,            -- URL where the chat started
  referrer        VARCHAR(500)     NULL,
  ip_address      VARCHAR(64)      NULL,
  user_agent      VARCHAR(255)     NULL,
  language        VARCHAR(10)      NULL,            -- en | es | zh
  -- Optional lead-capture fields (filled later if user provides them)
  lead_name       VARCHAR(160)     NULL,
  lead_email      VARCHAR(160)     NULL,
  lead_phone      VARCHAR(40)      NULL,
  lead_topic      VARCHAR(120)     NULL,            -- e.g. "Fertility", "Cupping"
  lead_captured_at DATETIME        NULL,
  -- Derived stats
  message_count   INT UNSIGNED     NULL DEFAULT 0,
  total_tokens    INT UNSIGNED     NULL DEFAULT 0,
  status          VARCHAR(20)      NULL DEFAULT 'open',  -- open | closed | escalated
  started_at      DATETIME         NULL DEFAULT CURRENT_TIMESTAMP,
  last_message_at DATETIME         NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (id),
  UNIQUE KEY uk_token (token),
  KEY idx_lead_email (lead_email),
  KEY idx_started (started_at)
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Messages — every turn (user + assistant) for transcript / audit.
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_messages (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  session_id      BIGINT UNSIGNED  NULL,
  role            VARCHAR(20)      NULL,            -- user | assistant | system
  content         MEDIUMTEXT       NULL,
  tokens_in       INT UNSIGNED     NULL,
  tokens_out      INT UNSIGNED     NULL,
  model           VARCHAR(60)      NULL,
  latency_ms      INT UNSIGNED     NULL,
  retrieved_chunk_ids VARCHAR(255) NULL,            -- comma-separated KB chunk ids used as context
  created_at      DATETIME         NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (id),
  KEY idx_session (session_id, created_at),
  CONSTRAINT fk_msg_session FOREIGN KEY (session_id)
    REFERENCES chatbot_sessions(id) ON DELETE CASCADE
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Knowledge base — source documents (one per scraped page or uploaded file).
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_kb_documents (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  source_type     VARCHAR(40)      NULL,            -- 'site' | 'pdf' | 'docx' | 'manual'
  source_url      VARCHAR(500)     NULL,            -- full URL or local file path
  title           VARCHAR(255)     NULL,
  language        VARCHAR(10)      NULL DEFAULT 'en',
  content_hash    CHAR(64)         NULL,            -- sha256 of cleaned content (for dedupe)
  raw_content     MEDIUMTEXT       NULL,            -- full extracted text
  word_count      INT UNSIGNED     NULL,
  is_active       TINYINT(1)       NULL DEFAULT 1,
  ingested_at     DATETIME         NULL DEFAULT CURRENT_TIMESTAMP,
  updated_at      DATETIME         NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
  PRIMARY KEY (id),
  UNIQUE KEY uk_hash (content_hash),
  KEY idx_active (is_active)
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Knowledge base — chunks (split docs for retrieval).
--  FULLTEXT index lets us do MATCH ... AGAINST keyword search without
--  needing a vector DB. Good enough for FAQ-style retrieval.
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_kb_chunks (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  document_id     BIGINT UNSIGNED  NULL,
  chunk_index     INT UNSIGNED     NULL,
  chunk_text      TEXT             NULL,
  word_count      INT UNSIGNED     NULL,
  PRIMARY KEY (id),
  KEY idx_doc (document_id, chunk_index),
  FULLTEXT KEY ft_chunk (chunk_text),
  CONSTRAINT fk_chunk_doc FOREIGN KEY (document_id)
    REFERENCES chatbot_kb_documents(id) ON DELETE CASCADE
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Optional FAQ override — manually curated Q&A pairs that should always
--  win over generated answers when matched.
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_faq (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  question        VARCHAR(500)     NULL,
  answer          TEXT             NULL,
  keywords        VARCHAR(500)     NULL,            -- comma-separated trigger keywords
  language        VARCHAR(10)      NULL DEFAULT 'en',
  is_active       TINYINT(1)       NULL DEFAULT 1,
  created_at      DATETIME         NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (id),
  FULLTEXT KEY ft_faq (question, keywords)
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Lead notifications — to be picked up by an email cron.
-- ----------------------------------------------------------------------------
CREATE TABLE IF NOT EXISTS chatbot_lead_outbox (
  id              BIGINT UNSIGNED  NOT NULL AUTO_INCREMENT,
  session_id      BIGINT UNSIGNED  NULL,
  payload         TEXT             NULL,            -- JSON snapshot of the lead
  sent_at         DATETIME         NULL,
  created_at      DATETIME         NULL DEFAULT CURRENT_TIMESTAMP,
  PRIMARY KEY (id),
  KEY idx_unsent (sent_at)
) ENGINE=InnoDB;

-- ----------------------------------------------------------------------------
--  Helpful views
-- ----------------------------------------------------------------------------
CREATE OR REPLACE VIEW v_chatbot_recent_sessions AS
SELECT s.id, s.token, s.lead_name, s.lead_email, s.lead_topic,
       s.message_count, s.total_tokens, s.status,
       s.started_at, s.last_message_at, s.page_url
FROM chatbot_sessions s
ORDER BY s.started_at DESC;

CREATE OR REPLACE VIEW v_chatbot_kb_status AS
SELECT d.id, d.title, d.source_url, d.source_type,
       d.word_count, d.is_active, d.ingested_at,
       COUNT(c.id) AS chunk_count
FROM chatbot_kb_documents d
LEFT JOIN chatbot_kb_chunks c ON c.document_id = d.id
GROUP BY d.id, d.title, d.source_url, d.source_type,
         d.word_count, d.is_active, d.ingested_at;
