oil-formula-calculator/frontend/src/composables/useSmartPaste.js

export const DROPS_PER_ML = 18.6

export const OIL_HOMOPHONES = {
  '相貌':'香茅','香矛':'香茅','向茅':'香茅','像茅':'香茅',
  '如香':'乳香','儒香':'乳香',
  '古巴想':'古巴香脂','古巴香':'古巴香脂','古巴相脂':'古巴香脂',
  '博荷':'薄荷','薄河':'薄荷',
  '尤佳利':'尤加利','优加利':'尤加利',
  '依兰':'依兰依兰',
  '雪松木':'雪松',
  '桧木':'扁柏','桧柏':'扁柏',
  '永久化':'永久花','永久华':'永久花',
  '罗马洋柑菊':'罗马洋甘菊','洋甘菊':'罗马洋甘菊',
  '天竹葵':'天竺葵','天竺癸':'天竺葵',
  '没要':'没药','莫药':'没药',
  '快乐鼠尾':'快乐鼠尾草',
  '椒样博荷':'椒样薄荷','椒样薄和':'椒样薄荷',
  '丝柏木':'丝柏',
  '柠檬草油':'柠檬草',
  '茶树油':'茶树',
  '薰衣草油':'薰衣草',
  '玫瑰花':'玫瑰',
}

/**
 * Levenshtein edit distance between two strings
 */
export function editDistance(a, b) {
  const m = a.length, n = b.length
  const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0))
  for (let i = 0; i <= m; i++) dp[i][0] = i
  for (let j = 0; j <= n; j++) dp[0][j] = j
  for (let i = 1; i <= m; i++) {
    for (let j = 1; j <= n; j++) {
      if (a[i - 1] === b[j - 1]) {
        dp[i][j] = dp[i - 1][j - 1]
      } else {
        dp[i][j] = 1 + Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])
      }
    }
  }
  return dp[m][n]
}

/**
 * Fuzzy match oil name against known list.
 * Priority: homophone -> exact -> substring -> missing-char -> edit distance
 * Returns matched oil name or null.
 */
export function findOil(input, oilNames) {
  if (!input || input.length === 0) return null
  const trimmed = input.trim()
  if (!trimmed) return null

  // 1. Homophone alias check
  if (OIL_HOMOPHONES[trimmed]) {
    const alias = OIL_HOMOPHONES[trimmed]
    if (oilNames.includes(alias)) return alias
  }

  // 2. Exact match
  if (oilNames.includes(trimmed)) return trimmed

  // 3. Substring match (input ⊂ name or name ⊂ input), prefer longest
  let substringMatches = []
  for (const name of oilNames) {
    if (name.includes(trimmed) || trimmed.includes(name)) {
      substringMatches.push(name)
    }
  }
  if (substringMatches.length > 0) {
    substringMatches.sort((a, b) => b.length - a.length)
    return substringMatches[0]
  }

  // 4. "Missing one char" match - input is one char shorter than an oil name
  for (const name of oilNames) {
    if (Math.abs(name.length - trimmed.length) === 1) {
      const longer = name.length > trimmed.length ? name : trimmed
      const shorter = name.length > trimmed.length ? trimmed : name
      // Check if shorter can be formed by removing one char from longer
      for (let i = 0; i < longer.length; i++) {
        const candidate = longer.slice(0, i) + longer.slice(i + 1)
        if (candidate === shorter) return name
      }
    }
  }

  // 5. Edit distance fuzzy match (only for 3+ char inputs to avoid false positives)
  if (trimmed.length < 3) return null
  let bestMatch = null
  let bestDist = Infinity
  for (const name of oilNames) {
    const dist = editDistance(trimmed, name)
    const maxLen = Math.max(trimmed.length, name.length)
    // Only accept if edit distance is reasonable (less than half the length)
    if (dist < bestDist && dist <= Math.floor(maxLen / 2)) {
      bestDist = dist
      bestMatch = name
    }
  }
  return bestMatch
}

/**
 * Greedy longest-match from concatenated string against oil names.
 * Returns array of matched oil names in order.
 */
export function greedyMatchOils(text, oilNames) {
  const results = []
  let i = 0
  while (i < text.length) {
    let bestMatch = null
    let bestLen = 0
    // Try all oil names sorted by length (longest first)
    const sorted = [...oilNames].sort((a, b) => b.length - a.length)
    for (const name of sorted) {
      if (text.substring(i, i + name.length) === name) {
        bestMatch = name
        bestLen = name.length
        break
      }
    }
    // Also check homophones
    if (!bestMatch) {
      for (const [alias, canonical] of Object.entries(OIL_HOMOPHONES)) {
        if (text.substring(i, i + alias.length) === alias) {
          if (!bestMatch || alias.length > bestLen) {
            bestMatch = canonical
            bestLen = alias.length
          }
        }
      }
    }
    if (bestMatch) {
      results.push(bestMatch)
      i += bestLen
    } else {
      i++
    }
  }
  return results
}

/**
 * Parse text chunk into [{oil, drops}] pairs.
 * Handles formats like "芳香调理8永久花10" or "薰衣草 3滴 茶树 2ml"
 * Also handles oil names without numbers, defaulting to 1 drop.
 */
export function parseOilChunk(text, oilNames) {
  const results = []
  // Match: name + optional number+unit
  const regex = /([^\d]+?)(\d+\.?\d*)\s*(ml|毫升|ML|mL|滴)?/g
  let match
  let lastIndex = 0
  while ((match = regex.exec(text)) !== null) {
    lastIndex = regex.lastIndex
    const namePart = match[1].trim()
    let amount = parseFloat(match[2])
    const unit = match[3] || ''

    const isMl = unit && (unit.toLowerCase() === 'ml' || unit === '毫升')
    let drops = amount
    // Convert ml to drops
    if (isMl) {
      drops = Math.round(amount * 20)
    }

    // Try greedy match on the name part
    const matched = greedyMatchOils(namePart, oilNames)
    if (matched.length > 0) {
      // Last matched oil gets the drops
      for (let i = 0; i < matched.length - 1; i++) {
        results.push({ oil: matched[i], drops: 1 })
      }
      const item = { oil: matched[matched.length - 1], drops }
      if (isMl) { item._ml = amount }
      results.push(item)
    } else {
      // Try findOil as fallback
      const found = findOil(namePart, oilNames)
      if (found) {
        const item = { oil: found, drops }
        if (isMl) { item._ml = amount }
        results.push(item)
      } else if (namePart) {
        results.push({ oil: namePart, drops, notFound: true })
      }
    }
  }

  if (lastIndex === 0) {
    // Regex matched nothing — try the whole text as oil names without numbers
    _parseNamesOnly(text.trim(), oilNames, results)
  } else {
    // Handle trailing text after last number match
    const trailing = text.substring(lastIndex).trim()
    if (trailing) {
      _parseNamesOnly(trailing, oilNames, results)
    }
  }

  return results
}

/** Parse text that contains only oil names (no numbers), default 1 drop each. */
function _parseNamesOnly(text, oilNames, results) {
  // Try greedy match first
  const matched = greedyMatchOils(text, oilNames)
  if (matched.length > 0) {
    for (const oil of matched) {
      results.push({ oil, drops: 1 })
    }
    return
  }
  // Fallback: try splitting by common delimiters and fuzzy match
  const parts = text.split(/[\s+、,，]+/).filter(s => s)
  for (const part of parts) {
    const found = findOil(part, oilNames)
    if (found) {
      results.push({ oil: found, drops: 1 })
    }
  }
}

/**
 * Split multi-recipe input by blank lines or semicolons.
 * Detects recipe boundaries (non-oil text after seeing oils = new recipe).
 */
export function splitRawIntoBlocks(raw, oilNames) {
  // First split by semicolons
  let parts = raw.split(/[;；]/)
  // Then split each part by blank lines
  let blocks = []
  for (const part of parts) {
    const subBlocks = part.split(/\n\s*\n/)
    blocks.push(...subBlocks)
  }
  // Filter empty blocks
  blocks = blocks.map(b => b.trim()).filter(b => b.length > 0)
  return blocks
}

/**
 * Parse one recipe block into {name, ingredients, notFound}.
 * 1. Split by commas/newlines/etc
 * 2. First non-oil, non-number part = recipe name
 * 3. Rest parsed through parseOilChunk
 * 4. Deduplicate ingredients
 */
export function parseSingleBlock(raw, oilNames) {
  // Split by commas, Chinese commas, newlines, spaces
  const parts = raw.split(/[,，\n\r]+/).map(s => s.trim()).filter(s => s)

  let name = ''
  let ingredientParts = []
  let foundFirstOil = false

  for (const part of parts) {
    // Check if this part contains oil references
    const hasNumber = /\d/.test(part)
    const hasOil = oilNames.some(oil => part.includes(oil)) ||
                   Object.keys(OIL_HOMOPHONES).some(alias => part.includes(alias))

    if (!foundFirstOil && !hasOil && !hasNumber && !name) {
      // This is the recipe name
      name = part
    } else {
      foundFirstOil = true
      ingredientParts.push(part)
    }
  }

  // Parse all ingredient parts
  const allIngredients = []
  const notFound = []
  for (const part of ingredientParts) {
    const parsed = parseOilChunk(part, oilNames)
    for (const item of parsed) {
      if (item.notFound) {
        notFound.push(item.oil)
      } else {
        allIngredients.push(item)
      }
    }
  }

  // Deduplicate: merge same oil, sum drops
  const deduped = []
  const seen = {}
  for (const item of allIngredients) {
    if (seen[item.oil] !== undefined) {
      deduped[seen[item.oil]].drops += item.drops
    } else {
      seen[item.oil] = deduped.length
      deduped.push({ ...item })
    }
  }

  return {
    name: name || '',
    ingredients: deduped,
    notFound
  }
}

/**
 * Parse multi-recipe text. Each time an unrecognized non-number token
 * appears after some oils have been found, it starts a new recipe.
 */
export function parseMultiRecipes(raw, oilNames) {
  // Split by blank lines into major blocks
  const blankLineSplit = raw.split(/\n\s*\n/).map(s => s.trim()).filter(s => s)
  if (blankLineSplit.length > 1) {
    return blankLineSplit.flatMap(block => parseMultiRecipes(block, oilNames))
  }
  // Split by semicolons only if both sides contain oil names
  const semiParts = raw.split(/[;；]/).map(s => s.trim()).filter(s => s)
  if (semiParts.length > 1) {
    const hasOilInPart = p => oilNames.some(oil => p.includes(oil)) ||
      Object.keys(OIL_HOMOPHONES).some(a => p.includes(a))
    if (semiParts.every(hasOilInPart)) {
      return semiParts.flatMap(block => parseMultiRecipes(block, oilNames))
    }
  }

  // First split by lines/commas, then within each part also try space splitting
  const roughParts = raw.split(/[,，、;；\n\r]+/).map(s => s.trim()).filter(s => s)
  const parts = []
  for (const rp of roughParts) {
    // If the part has spaces and contains mixed name+oil, split by spaces too
    // But only if spaces actually separate meaningful chunks
    const spaceParts = rp.split(/\s+/).filter(s => s)
    if (spaceParts.length > 1) {
      parts.push(...spaceParts)
    } else {
      // No spaces or single chunk — try to separate name prefix from oil+number
      // e.g. "长高芳香调理8" → check if any oil is inside
      const hasOilInside = oilNames.some(oil => rp.includes(oil))
      if (hasOilInside && rp.length > 2) {
        // Find the earliest oil match position
        let earliest = rp.length
        let earliestOil = ''
        for (const oil of oilNames) {
          const pos = rp.indexOf(oil)
          if (pos >= 0 && pos < earliest) {
            earliest = pos
            earliestOil = oil
          }
        }
        if (earliest > 0) {
          parts.push(rp.substring(0, earliest))
          parts.push(rp.substring(earliest))
        } else {
          parts.push(rp)
        }
      } else {
        parts.push(rp)
      }
    }
  }

  const recipes = []
  let current = { nameParts: [], ingredientParts: [], foundOil: false }

  for (const part of parts) {
    const hasNumber = /\d/.test(part)
    const textPart = part.replace(/\d+\.?\d*/g, '').trim()
    const hasOil = oilNames.some(oil => part.includes(oil)) ||
                   Object.keys(OIL_HOMOPHONES).some(alias => part.includes(alias))
    // Also check fuzzy: 3+ char parts
    const fuzzyOil = !hasOil && textPart.length >= 2 &&
                     findOil(textPart, oilNames)
    // First part only: has number but text is not any oil → likely a name like "美容1"
    const isFirstNameWithNumber = !current.foundOil && current.nameParts.length === 0 &&
      current.ingredientParts.length === 0 && hasNumber && !hasOil && !fuzzyOil && textPart.length >= 2

    if (current.foundOil && !hasOil && !fuzzyOil && !hasNumber && part.length >= 2) {
      // New recipe starts
      recipes.push(current)
      current = { nameParts: [], ingredientParts: [], foundOil: false }
      current.nameParts.push(part)
    } else if ((!current.foundOil && !hasOil && !fuzzyOil && !hasNumber) || isFirstNameWithNumber) {
      current.nameParts.push(isFirstNameWithNumber ? textPart : part)
    } else {
      current.foundOil = true
      current.ingredientParts.push(part)
    }
  }
  recipes.push(current)

  // Convert each block to parsed recipe
  return recipes.filter(r => r.ingredientParts.length > 0 || r.nameParts.length > 0).map(r => {
    const allIngs = []
    const notFound = []
    for (const p of r.ingredientParts) {
      const parsed = parseOilChunk(p, oilNames)
      for (const item of parsed) {
        if (item.notFound) notFound.push(item.oil)
        else allIngs.push(item)
      }
    }
    // Deduplicate
    const deduped = []
    const seen = {}
    for (const item of allIngs) {
      if (seen[item.oil] !== undefined) {
        deduped[seen[item.oil]].drops += item.drops
      } else {
        seen[item.oil] = deduped.length
        deduped.push({ ...item })
      }
    }
    return {
      name: r.nameParts.join(' ') || '',
      ingredients: deduped,
      notFound,
    }
  })
}