class Node { char ch = START; List<Node> nodes = Lists.newArrayList(); void add(String str) { if (str.length() == 0) return; char chNew = str.charAt(0); for (Node n : nodes) { if (n.ch == chNew) { n.add(str.substring(1)); return; } } Node newNode = new Node(); newNode.ch = chNew; newNode.add(str.substring(1)); nodes.add(newNode); } String toRegexp() {...} }
String toRegexp() { StringBuilder str = new StringBuilder(); if (ch == START) { } else if (ch == END) { } else { //convert special characters like {}[]. String newStr = escapeRegexp(String.valueOf(ch)); str.append(newStr); } if (nodes.size() > 1) { str.append("(?:"); for (Node n : nodes) { str.append(""); str.append(n.toRegexp()); str.append("|"); } str.setLength(str.length() - 1); str.append(')'); } else if (nodes.size() == 1) { str.append(nodes.get(0).toRegexp()); } return str.toString(); } }
public static String convertListToRegexp(final boolean useNonCapturingGroups, String... strs) { Arrays.sort(strs, new Comparator<String>() { public int compare(String o1, String o2) { int res = o2.length() - o1.length(); if (res != 0) { return res; } return o1.compareTo(o2); } }); Node root = new Node(); for (String str : strs) { root.add(str + "$"); } return root.toRegexp(); }
//create array of your entries String[] examples = new String[]{"javvva", "javggaaa", "javajava", "adsasd", "adasddsa"}; //convert them to optimal regexp String optimizedRegexp = RegExpUtils.convertListToRegexp(true, examples); Assert.assertEquals("(?:ad(?:asddsa|sasd)|jav(?:ajava|ggaaa|vva))", optimizedRegexp); //check that it is works for(String s : examples) Assert.assertTrue(s.matches(optimizedRegexp));
Source: https://habr.com/ru/post/117177/
All Articles