/**
 * Truth Protocol - Entity Detector
 * Scans page content for public figures
 *
 * WeRAI / HumAIn Integration
 * Patent Pending: USPTO #63900179
 */

// Known public figures database (expandable)
const KNOWN_ENTITIES = {
  politicians: [
    'Donald Trump', 'Joe Biden', 'Kamala Harris', 'Barack Obama',
    'Hillary Clinton', 'Bernie Sanders', 'Ron DeSantis', 'Gavin Newsom',
    'Justin Trudeau', 'Pierre Poilievre', 'Doug Ford',
    'Vladimir Putin', 'Xi Jinping', 'Volodymyr Zelensky',
    'Emmanuel Macron', 'Rishi Sunak', 'Benjamin Netanyahu'
  ],
  tech: [
    'Elon Musk', 'Mark Zuckerberg', 'Jeff Bezos', 'Tim Cook',
    'Sundar Pichai', 'Satya Nadella', 'Sam Altman', 'Jensen Huang',
    'Bill Gates', 'Larry Page', 'Sergey Brin'
  ],
  media: [
    'Tucker Carlson', 'Rachel Maddow', 'Joe Rogan', 'Sean Hannity',
    'Anderson Cooper', 'Don Lemon', 'Megyn Kelly', 'Ben Shapiro'
  ],
  business: [
    'Warren Buffett', 'Jamie Dimon', 'Larry Fink', 'Ken Griffin',
    'Ray Dalio', 'Cathie Wood', 'Michael Saylor'
  ]
};

// Flatten all entities into a single set for quick lookup
const ALL_ENTITIES = new Set(
  Object.values(KNOWN_ENTITIES).flat()
);

// Build regex patterns for entity detection
const ENTITY_PATTERNS = Array.from(ALL_ENTITIES).map(name => ({
  name: name,
  pattern: new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'gi')
}));

/**
 * Finds all entity mentions in a text node
 */
function findEntitiesInText(text) {
  const found = [];

  for (const entity of ENTITY_PATTERNS) {
    let match;
    while ((match = entity.pattern.exec(text)) !== null) {
      found.push({
        name: entity.name,
        index: match.index,
        length: match[0].length,
        matchedText: match[0]
      });
    }
    // Reset lastIndex for next search
    entity.pattern.lastIndex = 0;
  }

  // Sort by position
  found.sort((a, b) => a.index - b.index);

  return found;
}

/**
 * Walks the DOM tree and finds text nodes containing entities
 */
function scanDocument() {
  const walker = document.createTreeWalker(
    document.body,
    NodeFilter.SHOW_TEXT,
    {
      acceptNode: function(node) {
        // Skip scripts, styles, and already processed nodes
        const parent = node.parentElement;
        if (!parent) return NodeFilter.FILTER_REJECT;

        const tagName = parent.tagName.toLowerCase();
        if (['script', 'style', 'noscript', 'textarea', 'input'].includes(tagName)) {
          return NodeFilter.FILTER_REJECT;
        }

        // Skip if parent already has our annotation
        if (parent.classList.contains('tp-annotated')) {
          return NodeFilter.FILTER_REJECT;
        }

        // Skip very short text
        if (node.textContent.trim().length < 3) {
          return NodeFilter.FILTER_REJECT;
        }

        return NodeFilter.FILTER_ACCEPT;
      }
    }
  );

  const nodesToProcess = [];
  let node;

  while ((node = walker.nextNode())) {
    const entities = findEntitiesInText(node.textContent);
    if (entities.length > 0) {
      nodesToProcess.push({ node, entities });
    }
  }

  return nodesToProcess;
}

/**
 * Get the category of an entity
 */
function getEntityCategory(name) {
  for (const [category, entities] of Object.entries(KNOWN_ENTITIES)) {
    if (entities.includes(name)) {
      return category;
    }
  }
  return 'unknown';
}

// Export for use in annotator
window.TruthProtocol = window.TruthProtocol || {};
window.TruthProtocol.detector = {
  scanDocument,
  findEntitiesInText,
  getEntityCategory,
  KNOWN_ENTITIES,
  ALL_ENTITIES
};
