package ru.lexmin.lexm_core.dto; /** * (text) * (percent) * */ public class ReceivedText { /** * */ private static final long serialVersionUID = 5716001583591230233L; // , private String text; // private int percent; /** * */ public ReceivedText() { super(); } /** * * * @param text * {@link String} * @param percent * int */ public ReceivedText(String text, int percent) { super(); this.text = text; this.percent = percent; } /** * @return text {@link String} */ public String getText() { return text; } /** * * * @param text * text {@link String} */ public void setText(String text) { this.text = text; } /** * @return percent {@link int} */ public int getPercent() { return percent; } /** * * * @param percent * percent {@link int} */ public void setPercent(int percent) { this.percent = percent; } }
package ru.lexmin.lexm_core.dto; import java.util.HashMap; import java.util.Map; /** * : - * - . * * countOfWords (int) * frequencyWords (Map<String, Integer>): - * - * * receivedText - dto . * */ public class WordStat { /** * */ private static final long serialVersionUID = -1211530860332682161L; // dto private ReceivedText receivedText; // - , receivedText private int countOfWords; // , receivedText, // private Map<String, Integer> frequencyWords; /** * */ public WordStat() { super(); } /** * * * @param receivedText * @param countOfWords * @param frequencyWords */ public WordStat(ReceivedText receivedText, int countOfWords, Map<String, Integer> frequencyWords) { this.receivedText = receivedText; this.countOfWords = countOfWords; this.frequencyWords = frequencyWords; } /** * receivedText . * * * @param receivedText */ public WordStat(ReceivedText receivedText) { this.receivedText = receivedText; // this.countOfWords = 0; this.frequencyWords = new HashMap<String, Integer>(); } /** * @return receivedText {@link ReceivedText} */ public ReceivedText getReceivedText() { return receivedText; } /** * * * @param receivedText * receivedText {@link ReceivedText} */ public void setReceivedText(ReceivedText receivedText) { this.receivedText = receivedText; } /** * @return countOfWords {@link int} */ public int getCountOfWords() { return countOfWords; } /** * * * @param countOfWords * countOfWords {@link int} */ public void setCountOfWords(int countOfWords) { this.countOfWords = countOfWords; } /** * @return frequencyWords {@link Map<String,Integer>} */ public Map<String, Integer> getFrequencyWords() { return frequencyWords; } /** * * * @param frequencyWords * frequencyWords {@link Map<String,Integer>} */ public void setFrequencyWords(Map<String, Integer> frequencyWords) { this.frequencyWords = frequencyWords; } }
package ru.lexmin.lexm_core; import ru.lexmin.lexm_core.dto.ReceivedText; import ru.lexmin.lexm_core.dto.WordStat; /** * * * */ public interface TextAnalyzer { /** * {@link WordStat}, , * {@link ReceivedText} * * @param receivedText * {@link ReceivedText} * @return {@link WordStat} */ public abstract WordStat getWordStat(ReceivedText receivedText); }
package ru.lexmin.lexm_core; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Stream; import ru.lexmin.lexm_core.dto.ReceivedText; import ru.lexmin.lexm_core.dto.WordStat; /** * TextAnalyzer * */ public class TextAnalyzerImp implements TextAnalyzer { /* */ private final int PERCENT_100 = 100; private final int ONE_WORD = 1; private final String SPACE = " "; // : private final String ANY_APOSTROPHE = "[']"; // , private final String AVAILABLE_APOSTROPHE = "'"; // : , // (') private final String ONLY_LATIN_CHARACTERS = "[^az\\s']"; // : , private final String SPACES_MORE_ONE = "\\s{2,}"; /** * , * . , * . . * * @param text * {@link String} * @return */ private String filterText(String text) { String resultText = text.toLowerCase().replaceAll(ANY_APOSTROPHE, AVAILABLE_APOSTROPHE) .replaceAll(ONLY_LATIN_CHARACTERS, SPACE).replaceAll(SPACES_MORE_ONE, SPACE); return resultText; } /** * Map<{}, {}> * * @param text * {@link String} * @return Map */ private Map<String, Integer> getWordsMap(String text) { Map<String, Integer> wordsMap = new HashMap<String, Integer>(); String newWord = ""; Pattern patternWord = Pattern.compile("(?<word>[a-z']+)"); Matcher matcherWord = patternWord.matcher(text); // while (matcherWord.find()) { newWord = matcherWord.group("word"); if (wordsMap.containsKey(newWord)) { // Map 1 wordsMap.replace(newWord, wordsMap.get(newWord) + ONE_WORD); } else { // Map 1 wordsMap.put(newWord, ONE_WORD); } } return wordsMap; } /** * , * Map * * @param wordsMap * {@link Map} * @return , Map */ private int getCountOfWords(Map<String, Integer> wordsMap) { int countOfWords = 0; // Map for (Integer value : wordsMap.values()) countOfWords += value; return countOfWords; } /** * * numberXPercents number100Percents * * @param number100Percents * int * @param numberXPercents * int * @return */ private int getPercent(int number100Percents, int numberXPercents) { return (numberXPercents * PERCENT_100) / number100Percents; } /** * , * * * @param wordsMap * {@link Map} * @param countOfWords * int * @param percent * int * @return , * */ private Map<String, Integer> filterWordsMap(Map<String, Integer> wordsMap, int countOfWords, int percent) { // LinkedHashMap - , // Map<String, Integer> resultMap = new LinkedHashMap<String, Integer>(); int sumPercentOfWords = 0; // Map Entry<String, Integer>, // Stream<Entry<String, Integer>> streamWords = wordsMap.entrySet() .stream().sorted(Map.Entry.comparingByValue( (Integer value1, Integer value2) -> ( value1.equals(value2)) ? 0 : ((value1 < value2) ? 1 : -1) ) ); // Iterator<Entry<String, Integer>> iterator = streamWords.iterator(); // resultMap , // while (iterator.hasNext() && (sumPercentOfWords < percent)) { Entry<String, Integer> wordEntry = iterator.next(); resultMap.put(wordEntry.getKey(), wordEntry.getValue()); sumPercentOfWords += getPercent(countOfWords, wordEntry.getValue()); } return resultMap; } /* * (non-Javadoc) * * @see * ru.lexmin.lexm_core.TextAnalyzer#getWordStat(ru.lexmin.lexm_core.dto. * ReceivedText) */ @Override public WordStat getWordStat(ReceivedText receivedText) { WordStat wordStat = new WordStat(receivedText); Map<String, Integer> wordsMap = getWordsMap(filterText(receivedText.getText())); wordStat.setCountOfWords(getCountOfWords(wordsMap)); wordStat.setFrequencyWords( filterWordsMap(wordsMap, wordStat.getCountOfWords(), receivedText.getPercent()) ); return wordStat; } }
package testText; import ru.lexmin.lexm_core.TextAnalyzer; import ru.lexmin.lexm_core.TextAnalyzerImp; import ru.lexmin.lexm_core.dto.ReceivedText; import ru.lexmin.lexm_core.dto.WordStat; public class Main { public static void main(String[] args) { final int PERCENT = 80; TextAnalyzer ta = new TextAnalyzerImp(); String friends = "There's nothing to tell! He's .... "; ReceivedText receivedText = new ReceivedText(friends, PERCENT); WordStat wordStat = ta.getWordStat(receivedText); System.out.println(" : " + wordStat.getCountOfWords()); System.out.println(" , 80% : " + wordStat.getFrequencyWords().size()); System.out.println(" , 80% "); wordStat.getFrequencyWords().forEach((word, count) -> System.out.println(word)); } }
Source: https://habr.com/ru/post/282750/
All Articles