$ mkdir firewind $ cd firewind $ touch core.php
<?php class firewind { public $VERSION = "1.0.0"; function __construct() { // // } } ?>
# $ unzip phpmorphy-0.3.7.zip $ mv phpmorphy-0.3.7 phpmorphy # phpmorphy/dicts $ unzip morphy-0.3.x-ru_RU-withjo-utf-8.zip -d phpmorphy/dicts/ # $ rm phpmorphy-0.3.7.zip morphy-0.3.x-ru_RU-withjo-utf-8.zip
<?php require_once __DIR__.'/phpmorphy/src/common.php'; class morphyus { private $phpmorphy = null; private $regexp_word = '/([a-z-0-9]+)/ui'; private $regexp_entity = '/&([a-zA-Z0-9]+);/'; function __construct() { $directory = __DIR__.'/phpmorphy/dicts'; $language = 'ru_RU'; $options[ 'storage' ] = PHPMORPHY_STORAGE_FILE; // // $this->phpmorphy = new phpMorphy( $directory, $language, $options ); } /** * * * @param {string} content * @param {boolean} filter HTML- * @return {array} */ public function get_words( $content, $filter=true ) { // HTML- HTML- // if ( $filter ) { $content = strip_tags( $content ); $content = preg_replace( $this->regexp_entity, ' ', $content ); } // // $content = mb_strtoupper( $content, 'UTF-8' ); // // $content = str_ireplace( '', '', $content ); // // preg_match_all( $this->regexp_word, $content, $words_src ); return $words_src[ 1 ]; } /** * * * @param {string} word * @param {array|boolean} , false */ public function lemmatize( $word ) { // // $lemmas = $this->phpmorphy->lemmatize( $word ); return $lemmas; } } ?>
<?php require_once __DIR__.'/phpmorphy/src/common.php'; class morphyus { private $phpmorphy = null; private $regexp_word = '/([a-z-0-9]+)/ui'; private $regexp_entity = '/&([a-zA-Z0-9]+);/'; // ... // /** * * * @param {string} word * @param {array} profile * @return {integer} 0 5 */ public function weigh( $word, $profile=false ) { // // $partsOfSpeech = $this->phpmorphy->getPartOfSpeech( $word ); // // if ( !$profile ) { $profile = [ // // '' => 0, '' => 0, '' => 0, '' => 0, '' => 0, '' => 0, // // '' => 5, '' => 5, '' => 3, '' => 3, // // 'DEFAULT' => 1 ]; } // // if ( !$partsOfSpeech ) { return $profile[ 'DEFAULT' ]; } // // for ( $i = 0; $i < count( $partsOfSpeech ); $i++ ) { if ( isset( $profile[ $partsOfSpeech[ $i ] ] ) ) { $range[] = $profile[ $partsOfSpeech[ $i ] ]; } else { $range[] = $profile[ 'DEFAULT' ]; } } return max( $range ); } } ?>
{ "range" : "< >", "words" : [ // // { "source" : "< >", "range" : "< >", "count" : "< >", "weight" : "< >", "basic" : [ // // ] } ] }
<?php require_once 'morphyus.php'; class firewind { public $VERSION = "1.0.0"; private $morphyus; function __construct() { $this->morphyus = new morphyus; } /** * * * @param {string} content * @param {integer} [range] * @return {object} */ public function make_index( $content, $range=1 ) { $index = new stdClass; $index->range = $range; $index->words = []; // // $words = $this->morphyus->get_words( $content ); foreach ( $words as $word ) { // // $weight = $this->morphyus->weigh( $word ); if ( $weight > 0 ) { // // $length = count( $index->words ); // // for ( $i = 0; $i < $length; $i++ ) { if ( $index->words[ $i ]->source === $word ) { // // $index->words[ $i ]->count++; $index->words[ $i ]->range = $range * $index->words[ $i ]->count * $index->words[ $i ]->weight; // // continue 2; } } // // $lemma = $this->morphyus->lemmatize( $word ); if ( $lemma ) { // // for ( $i = 0; $i < $length; $i++ ) { // // if ( $index->words[ $i ]->basic ) { $difference = count( array_diff( $lemma, $index->words[ $i ]->basic ) ); // // if ( $difference === 0 ) { $index->words[ $i ]->count++; $index->words[ $i ]->range = $range * $index->words[ $i ]->count * $index->words[ $i ]->weight; // // continue 2; } } } } // , , // // // $node = new stdClass; $node->source = $word; $node->count = 1; $node->range = $range * $weight; $node->weight = $weight; $node->basic = $lemma; $index->words[] = $node; } } return $index; } } ?>
<?php $range = < > * < > * < >; // : // $index->words[ $i ]->range = $range * $index->words[ $i ]->count * $index->words[ $i ]->weight; ?>
<?php require_once '../src/core.php'; $firewind = new firewind; // // $source = file_get_contents( './source.html' ); // // $begin_time = microtime( true ); echo "Indexing started: $begin_time\n"; // // $index = $firewind->make_index( $source ); // // $finish_time = microtime( true ); echo "Indexing finished: $finish_time\n"; // // $total_time = $finish_time - $begin_time; echo "Total time: $total_time\n"; ?>
$ php benchmark.php Indexing started: 1417343592.3094 Indexing finished: 1417343593.5604 Total time: 1.2510349750519
<?php require_once 'morphyus.php'; class firewind { public $VERSION = "1.0.0"; private $morphyus; // ... // /** * * * @param {object} target * @param {object} source , * @return {integer} */ public function search( $target, $index ) { $total_range = 0; // // foreach ( $target->words as $target_word ) { // // foreach ( $index->words as $index_word ) { if ( $index_word->source === $target_word->source ) { $total_range += $index_word->range; } else if ( $index_word->basic && $target_word->basic ) { // // $index_count = count( $index_word ->basic ); $target_count = count( $target_word ->basic ); for ( $i = 0; $i < $target_count; $i++ ) { for ( $j = 0; $j < $index_count; $j++ ) { if ( $index_word->basic[ $j ] === $target_word->basic[ $i ] ) { $total_range += $index_word->range; continue 2; } } } } } } return $total_range; } } ?>
CREATE TABLE `production` ( `uid` INT NOT NULL AUTO_INCREMENT, -- `name` VARCHAR(45) NOT NULL, -- `manufacturer` VARCHAR(45) NOT NULL, -- `price` INT NOT NULL, -- `keywords` TEXT NULL, -- PRIMARY KEY ( `uid` ) ); SHOW COLUMNS FROM `production`; +--------------+-------------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +--------------+-------------+------+-----+---------+-------+ | uid | int(11) | NO | PRI | NULL | | | name | varchar(45) | NO | | NULL | | | manufacturer | varchar(45) | NO | | NULL | | | price | int(11) | NO | | NULL | | | keywords | text | YES | | NULL | | +--------------+-------------+------+-----+---------+-------+
CREATE TABLE `description` ( `uid` INT NOT NULL AUTO_INCREMENT, -- `fid` INT NOT NULL, -- `description` LONGTEXT NOT NULL, -- `index` TEXT NULL, -- PRIMARY KEY ( `uid` ) ); SHOW COLUMNS FROM `description`; +-------------+----------+------+-----+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------------+----------+------+-----+---------+-------+ | uid | int(11) | NO | PRI | NULL | | | fid | int(11) | NO | | NULL | | | description | longtext | NO | | NULL | | | index | text | YES | | NULL | | +-------------+----------+------+-----+---------+-------+
<?php require_once 'firewind/core.php'; $firewind = new firewind; $connection = new mysqli( 'host', 'user', 'password', 'database' ); if ( $connection->connect_error ) { die( 'Cannot connect to database.' ); } $connection->set_charset( 'UTF8' ); function add_product( $name, $manufacturer, $price, $description, $keywords ) { global $firewind, $connection; // // $description_index = $firewind->make_index( $description ); $description_index = json_encode( $description_index ); // // $keywords_index = $firewind->make_index( $keywords, 2 ); $keywords_index = json_encode( $keywords_index ); // // $production_query = $connection->prepare( "INSERT INTO `production` ( `name`, `manufacturer`, `price`, `keywords` ) VALUES ( ?, ?, ?, ? )" ); $description_query = $connection->prepare( "INSERT INTO `description` ( `fid`, `description`, `index` ) VALUES ( LAST_INSERT_ID(), ?, ? )" ); if ( !$production_query || !$description_query ) { die( "Cannot prepare requests!\n" ); } if ( // // $production_query -> bind_param( 'ssis', $name, $manufacturer, $price, $keywords_index ) && $description_query -> bind_param( 'ss', $description, $description_index ) && // // $production_query -> execute() && $description_query -> execute() ) { // // echo( "Product successfully added!\n" ); // // $production_query -> close(); $description_query -> close(); return true; } else { die( "An error occurred while executing query...\n" ); } } ?>
<?php require_once '../src/core.php'; $firewind = new firewind; $connection = new mysqli( 'host', 'user', 'password', 'database' ); if ( $connection->connect_error ) { die( 'Cannot connect to database.' ); } $connection->set_charset( 'UTF8' ); // // $query = isset( $_GET[ 'query' ] ) ? trim( $_GET[ 'query' ] ) : false; if ( $query ) { // // $query_index = $firewind->make_index( $query ); // // $production = $connection->query(" SELECT p.`uid`, p.`name`, p.`keywords`, d.`index` FROM `production` p, `description` d WHERE p.`uid` = d.`uid` "); if ( !$production ) { die( "Cannot get production info.\n" ); } // // while ( $product = $production->fetch_assoc() ) { // // $keywords = json_decode( $product[ 'keywords' ] ); $index = json_decode( $product[ 'index' ] ); $range = $firewind->search( $query_index, $keywords ); $range += $firewind->search( $query_index, $index ); if ( $range > 0 ) { $result[ $product[ 'uid' ] ] = $range; } } // - // if ( isset( $result ) ) { // // arsort( $result ); // // $i = 1; foreach ( $result as $uid => $range ) { printf( "#%d. Found product with id %d and range %d.\n", $i++, $uid, $range ); } } else { echo( "Sorry, no results found.\n" ); } } else { echo( "Query cannot be empty. Try again.\n" ); } ?>
Source: https://habr.com/ru/post/244561/