<? php
// for a start, I decided that Unicode is good (I will not go into details, the topic is not about that), therefore our word and all operations on its component parts will occur in Unicode characters
// the following function will help us with this:
function win2uni ($ s)
{
// convert win1251 -> iso8859-5:
$ s = convert_cyr_string ($ s, 'w' , 'i' );
// conversion iso8859-5 -> unicode:
for ($ result = '' , $ i = 0; $ i <strlen ($ s); $ i ++) {
$ charcode = ord ($ s [$ i]);
$ result. = ($ charcode> 175)? "& #" . (1040 + ($ charcode-176)). ";" : $ s [$ i];
}
return $ result;
}
// Now, having dealt with the encodings, divide the letters into groups, as described above.
// of course, for absolute parvdopodobnosti we would need to handle not letters, but sounds, but I decided to simplify my task.
// we will not process the options for softening the sound (b) as it should be according to the rules of the section layer, but simply agree as follows:
// the hyphenation symbol never (!) can stand in front of "" and ""
// in the course of processing, we will simply ignore them and, if necessary, move the hyphenation symbol
// here. go:
$ group_4 = array (win2uni ( "a" ), win2uni ( "e" ), win2uni ( "e" ), win2uni ( "i" ), win2uni ( "o" ), win2uni ( "y" ), win2uni ( " e " ), win2uni ( " u " ), win2uni ( " i " ));
$ group_3 = array (win2uni ( "l" ), win2uni ( "m" ), win2uni ( "n" ), win2uni ( "p" ), win2uni ( "y" ));
$ group_2 = array (win2uni ( “b” ), win2uni ( “c” ), win2uni ( “g” ), win2uni ( “g” ), win2uni ( “c” ), win2uni ( “g” ));
$ group_1 = array (win2uni ( "k" ), win2uni ( "n" ), win2uni ( "c" ), win2uni ( "f" ), win2uni ( "t" ), win2uni ( "sh" ), win2uni ( " u " ), win2uni ( " x " ), win2uni ( " c " ), win2uni ( " h " ));
// now describe the variables used by the script:
$ word = "cat" ; // word we split into syllables
$ split = array (); // array in which we store the belonging of each character of the word to one of the described groups
$ word_split = array (); // word broken into characters
$ start = 0; // start of cycle
$ end = strlen ($ word); // end of cycle
// so let's start processing:
// shovel the source word:
while ($ start <$ end)
{
$ word_split [$ start] = win2uni (substr ($ word, $ start, 1)); // pick up the character
$ is_group1 = in_array (win2uni ($ word_split [$ start]), $ group_1); // if the character belongs to the first group, set the corresponding. true flag
$ is_group2 = in_array (win2uni ($ word_split [$ start]), $ group_2); // similarly
$ is_group3 = in_array (win2uni ($ word_split [$ start]), $ group_3); // similarly
$ is_group4 = in_array (win2uni ($ word_split [$ start]), $ group_4); // similarly
// (in general, you can do without flags, they helped me in the debugging process, and then they were too lazy to remove them ...)
// now check the satus flags:
if (! empty ($ is_group1)) // symbol activated the first flag!
{
$ split [$ start] = 1; // write down the character's belonging to the first group in acc. array
}
elseif (! empty ($ is_group2)) // similarly
{
$ split [$ start] = 2;
}
elseif (! empty ($ is_group3)) // similarly
{
$ split [$ start] = 3;
}
elseif (! empty ($ is_group4)) // similarly
{
$ split [$ start] = 4;
}
elseif (empty ($ is_group1) and empty ($ is_group2) and empty ($ is_group3) and empty ($ is_group4)) // and if this symbol is not in any of the groups (this is a soft sign, for example), then
{
$ split [$ start] = $ word_split [$ start]; // write it as it is, and then we'll figure it out
}
$ start ++;
}
// like this, the word has been picked up. next - test output $ split, see what happened
foreach ($ split as $ s)
{
echo $ s;
}
echo "<br>" ;
// and the test output of $ word_split, besides everything, the output of $ split should be compared with something =)
foreach ($ word_split as $ w)
{
echo $ w;
}
echo "<br>" ;
// well now, actually, we beat the word on syllables:
// (I was too lazy to save the output result to a variable, and then output it, so I print it immediately in a loop):
$ count = 0; // we have a new counter =) old I fired =) =)
while ($ count <= count ($ split))
{
$ a = $ split [$ count]; // belonging to the group of the current character
$ b = $ split [$ count + 1]; // belonging to the group of the next character
// calculate the difference between the group of current and next characters.
if ($ a- $ b == 0 and $ b == 4) // if it is 0 and these are vowels
{
echo $ word_split [$ count];
echo "-" ; // we push the transfer between them
}
else
{
if (! is_numeric ($ b) or $ a- $ b <= 0) // if the “soft sign” is next or there is no decline in sonority
{
echo $ word_split [$ count]; // do not put any hyphenation symbol
}
else // if there is a decrease in sonority
{
echo $ word_split [$ count];
echo "-" ; // insert hyphenation character
}
}
$ count ++;
}
echo "<br>" ;
// that's all =)
?> * This source code was highlighted with Source Code Highlighter .
Source: https://habr.com/ru/post/37448/
All Articles