class Trie( val ends: Boolean = false, // whether this node is end of some string private val parent: Trie = null, private val childs : TreeMap[Char, Trie] = null, val value : Char = 0x0)
/// exact search def contains(s: String) = { @tailrec def impl(t: Trie, pos: Int): Boolean = if (s.size < pos) false else if (s.size == pos) t.ends else if (t.childs == null) false else if (t.childs.contains(s(pos)) == false) false else impl(t.childs(s(pos)), pos + 1) impl(this, 0) }
/// insertion def +(s: String) = { def insert(trie: Trie, pos: Int = 0) : Trie = if (s.size < pos) trie else if (pos == 0 && trie.contains(s)) trie else if (s.size == pos) if (trie.ends) trie else new Trie(true, trie.parent, trie.childs, trie.value) else { val c = s(pos) val children = Option(trie.childs).getOrElse(TreeMap.empty[Char, Trie]) val child = children.getOrElse( c, new Trie(s.size == pos + 1, trie, null, c)) new Trie( trie.ends, trie.parent, children + (c, insert(child, pos + 1)), trie.value) } insert(this, 0) }
object Trie { def apply(seq: Iterator[String]) : Trie = seq.filter(_.nonEmpty).foldLeft(new Trie)(_ + _) def apply(seq: Seq[String]) : Trie = apply(seq.iterator) }
case class Variant(val pos: Int, val node: Trie)(val penalty: Int)
def prefixes(toFind: String): Stream[Variant]
def streamGen[Ctx, A] (init: Ctx)(gen: Ctx => Option[(A, Ctx)]): Stream[A] = { val op = gen(init) if (op.isEmpty) Stream.Empty else op.get._1 #:: streamGen(op.get._2)(gen) }
private class Context( // immutable priority queue, Map of (penalty-, prefix pos+) -> List[Variant] val q: TreeMap[(Int, Int), List[Variant]], // immutable visited nodes cache val cache: HashSet[Variant]) { // extract from 'q' value with lowest penalty and greatest prefix position def pop: (Option[Variant], Context) = { if (q.isEmpty) (None, this) else { val (key, list) = q.head if (list.tail.nonEmpty) (Some(list.head), new Context(q - key + (key, list.tail), cache)) else (Some(list.head), new Context(q - key, cache)) } } // enqueue nodes def ++(vars: Seq[Variant]) = { val newq = vars.filterNot(cache.contains).foldLeft(q) { (q, v) => val key = (v.penalty, v.pos) if (q.contains(key)) { val l = q(key); q - key + (key, v :: l) } else q + (key, v :: Nil) } new Context(newq, cache) } // searches node in cache def apply(v: Variant) = cache(v) // adds node to cache; it marks it as visited def addCache(v: Variant) = new Context(q, cache + v) } private object Context { def apply(init: Variant) = { // ordering of prefix queue: min by penalty, max by prefix pos val ordering = new Ordering[(Int, Int)] { def compare(v1: (Int, Int), v2: (Int, Int)) = if (v1._1 == v2._1) v2._2 - v1._2 else v1._1 - v2._1 } new Context( TreeMap(((init.penalty, init.pos), init :: Nil))(ordering), HashSet.empty[Variant]) } }
// impresize search lookup, returns stream of prefix match variants with lowest penalty def prefixes(toFind: String) : Stream[Variant] = { val init = Variant(0, this)(0) // returns first unvisited node @tailrec def whileCached(ctx: Context): (Option[Variant], Context) = { val (v, ctx2) = ctx.pop if (v.isEmpty) (v, ctx2) else if (!ctx2(v.get)) (Some(v.get), ctx2) else whileCached(ctx2) } // generates graph edges from current node def genvars(v: Variant): List[Variant] = { val replacePass: List[Variant] = if (v.node.childs == null) Nil else v.node.childs.toList flatMap { pair => val (key, child) = pair val pass = Variant(v.pos, child)(v.penalty + 1) :: Nil if (v.pos < toFind.length) Variant(v.pos + 1, child)(v.penalty + {if (toFind(v.pos) == key) 0 else 1}) :: pass else pass } if (v.pos != toFind.length) { Variant(v.pos + 1, v.node)(v.penalty + 1) :: replacePass } else replacePass } streamGen(Context(init)) { ctx => val (best, ctx2) = whileCached(ctx) best.map { v => (v, (ctx2 ++ genvars(v)).addCache(v)) } } }
val pass = Variant(v.pos, child)(v.penalty + 1)
Variant(v.pos + 1, child)(v.penalty + {if (toFind(v.pos) == key) 0 else 1})
Variant(v.pos + 1, v.node)(v.penalty + 1)
def limitedPrefixes(toFind: String, penaltyLimit: Int = 5): Stream[Variant] = { prefixes(toFind).takeWhile(_.penalty < penaltyLimit) }
def matches(toFind: String, penaltyLimit: Int = 5): Stream[Variant] = { limitedPrefixes(toFind, penaltyLimit).filter { v => v.node.ends && v.pos == toFind.size } }
def makeString() : String = { @tailrec def helper(t: Trie, sb: StringBuilder): String = if (t == null || t.parent == null) sb.result.reverse else { helper(t.parent, sb += t.value) } helper(this, new StringBuilder) }
def matchValues(toFind: String, penaltyLimit: Int = 5): Stream[String] = { matches(toFind, penaltyLimit).map(_.node.makeString) }
matches(toFind).take(k).map(_.node.makeString)
matches(toFind).takeWhile(_.penalty < d).map(_.node.makeString)
Source: https://habr.com/ru/post/345002/
All Articles