import pymorphy2 as py def tags(word): morph = py.MorphAnalyzer() return morph.parse(word) >>> print(tags('')[0]) Parse(word='', tag=OpencorporaTag('ADJF,Qual femn,sing,gent'), normal_form='', score=0.125, methods_stack=((<DictionaryAnalyzer>, '', 86, 8),)) >>> print(tags('')[0].tag.grammemes) frozenset({'femn', 'ADJF', 'sing', 'gent', 'Qual'})
source = ''' # # / _- NOUN,nomn VERB NOUN,accs '''
class PPattern: def __init__(self): super().__init__() import io def parseSource(src): def parseLine(s): nonlocal arr, last s = s.strip() if s == '': last = None return if s[0] == '#': return if last is None: last = PPattern() arr.append(last) last.example = s else: last.tags = s.split() arr = [] last = None buf = io.StringIO(src) s = buf.readline() while s: parseLine(s) s = buf.readline() return arr s = parseSource(source)
source = ''' # # / _- NOUN,nomn VERB NOUN,accs ADJF NOUN # NOUN,nomn VERB NOUN,loct ''' text = ''' ''' import pymorphy2 as py class PPattern: def __init__(self): super().__init__() def checkPhrase(self,text): def checkWordTags(tags, grams): for t in tags: if t not in grams: return False return True def checkWord(tags, word): variants = morph.parse(word) for v in variants: if checkWordTags(self.tags[nextTag].split(','), v.tag.grammemes): return (word, v) return None morph = py.MorphAnalyzer() words = text.split() nextTag = 0 result = [] for w in words: res = checkWord(self.tags[nextTag].split(','), w) if res is not None: result.append(res) nextTag = nextTag + 1 if nextTag >= len(self.tags): return result return None def parseText(pats, text): def parseLine(line): was = False for p in pats: res = p.checkPhrase(line) if res: print('+',line, p.tags, [r[0] for r in res]) was = True if not was: print('-',line) buf = io.StringIO(text) s = buf.readline() while s: s = s.strip() if s != '': parseLine(s) s = buf.readline() patterns = parseSource(source) parseText(patterns, text)
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
source = ''' # ADJF NOUN -a- -b- # , = a.case = b.case = a.number = b.number = a.gender = b.gender '''
-a- -b-
and rule lines that begin with "=" appeared. In general, I did not bother with the syntax of templates, so each statement lives on one line, and the type of statement is determined by the first character. def parseFunc(v, names): dest = v.split('.') index = names.index(dest[0]) dest = (eval('lambda a: a.' + '.'.join(dest[1:])), index) return dest def parseLine(s): ... elif s[0] == '-': # s = [x.strip('-') for x in s.split()] last.names = s elif s[0] == '=': # s = [x for x in s[1:].split() if x != ''] dest = parseFunc(s[0],last.names) src = parseFunc(s[2],last.names) last.rules.append(((dest[1],src[1]), dest, src)) else: ...
... res = checkWord(self.tags[nextTag].split(','), w) if res is not None: result.append(res) usedP.add(wi) if not self.checkRules(usedP, result): result.remove(res) usedP.remove(wi) else: nextTag = nextTag + 1 if nextTag >= len(self.tags): return (result, usedP) ... def checkRules(self, used, result): for r in self.rules: if max(r[0]) < len(result): destRes = result[r[0][0]] destV = destRes[1] destFunc = r[1][0] srcRes = result[r[0][1]] srcFunc = r[2][0] srcV = srcRes[1] if not self.checkPropRule(destFunc,destV, srcFunc, srcV): return False return True def checkPropRule(self, getFunc, getArgs, srcFunc, srcArgs, \ op = lambda x,y: x == y): v1 = getFunc(getArgs) v2 = srcFunc(srcArgs) return op(v1,v2)
+ ['ADJF', 'NOUN'] ['', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN,nomn', 'VERB', 'PREP', 'NOUN,loct'] ['', '', '', '']
+ ['NOUN,nomn', 'VERB', 'PREP', 'NOUN,loct'] ['', '', '', '']
# NOUN Name -a- -b- = a.tag.case = b.tag.case = a.tag.number = b.tag.number
+ - ['NOUN', 'Name'] ['', '']
= a.gender = b.gender
, because the “younger ones” have no tribal affiliation and can refer to the masculine word “brothers” , and to the female "sister." = a.tag.gender is None or a.tag.gender == b.tag.gender
import ast def parseSource(src): def parseFunc(expr, names): m = ast.parse(expr) # varList = list(set([ x.id for x in ast.walk(m) if type(x) == ast.Name])) # indexes = [ names.index(v) for v in varList ] lam = 'lambda %s: %s' % (','.join(varList), expr) return (indexes, eval(lam), lam)
+ ['ADJF', 'NOUN'] ['', '']
# , text = ''' - '''
# source = ''' # # # / _- NOUN,nomn VERB NOUN,accs # -a- -b- -c- = a.tag.number == b.tag.number # :SNOUN # ADJF NOUN -a- -b- # = a.tag.case == b.tag.case = a.tag.number == b.tag.number = a.tag.gender is None or a.tag.gender == b.tag.gender # # NOUN,nomn VERB PREP NOUN,loct -a- -b- -c- -d- = a.tag.number == b.tag.number # VERB INFN # NOUN Name -a- -b- = a.tag.case == b.tag.case = a.tag.number == b.tag.number # NOUN CONJ NOUN -a- -c- -b- = a.tag.case == b.tag.case # NOUN PNCT NOUN -a- -c- -b- = a.tag.case == b.tag.case = c.normal_form == '-' '''
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN', 'Name'] ['', '']
+ ['NOUN', 'Name'] ['', '']
+ ['NOUN', 'Name'] ['', '']
+ ['NOUN', 'CONJ', 'NOUN'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ - ['NOUN', 'Name'] ['', '']
+ - ['NOUN', 'PNCT', 'NOUN'] ['', '-', '']
+ - ['NOUN', 'PNCT', 'NOUN'] ['', '-', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['NOUN,nomn', 'VERB', 'PREP', 'NOUN,loct'] ['', '', '', '']
+ ['NOUN,nomn', 'VERB', 'NOUN,accs'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN', 'CONJ', 'NOUN'] ['', '', '']
+ ['NOUN', 'CONJ', 'NOUN'] ['', '', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN,nomn', 'VERB', 'PREP', 'NOUN,loct'] ['', '', '', '']
+ ['NOUN,nomn', 'VERB', 'PREP', 'NOUN,loct'] ['', '', '', '']
+ ['VERB', 'INFN'] ['', '']
>>> parseText(patterns, ' ')
+ ['ADJF', 'NOUN'] ['', '']
+ ['NOUN', 'Name'] ['', '']
Source: https://habr.com/ru/post/350802/
All Articles