<?php namespace app\components; use Yii; use \yii\base\Object; class ParserXenforo extends Object { }
<?php namespace app\components; use Yii; use \yii\base\Object; class ParserXenforo extends Object { /** * Uri */ const REQUEST_URI_LOGIN = 'login/login'; /** * cookies */ const COOKIES_FILE_NAME = 'cookies.txt'; /** * @var string */ private $_data; /** * @var string */ public $host; /** * @var string */ public $username; /** * @var string */ public $password; /** * @var array cURL */ public $curlOpt; }
protected function getCurlOpt($nameOpt) { if ($nameOpt !== 'userAgent' && $nameOpt !== 'header') { return false; } return $this->curlOpt[$nameOpt]; }
protected function getLoginUrl() { return $this->host . self::REQUEST_URI_LOGIN; }
protected function createPostRequestForCurl() { return 'login=' . $this->username . '&password=' . $this->password . '&remember=1'; }
protected function getPathToCookieFile($cookieFileName = self::COOKIES_FILE_NAME) { return Yii::getAlias('@app/runtime') . DIRECTORY_SEPARATOR . $cookieFileName; }
public function loadUsingCurl($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $this->loginUrl); curl_setopt($ch, CURLOPT_FAILONERROR, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_REFERER, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $this->getCurlOpt('header')); curl_setopt($ch, CURLOPT_COOKIEFILE, $this->pathToCookieFile); curl_setopt($ch, CURLOPT_COOKIEJAR, $this->pathToCookieFile); curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1); curl_setopt($ch, CURLOPT_USERAGENT, $this->getCurlOpt('userAgent')); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $this->createPostRequestForCurl()); $this->_data = curl_exec($ch); if (curl_exec($ch) === false) { throw new \Exception(curl_errno($ch) . ': ' . curl_error($ch)); } curl_close($ch); Yii::info(Yii::t('app', 'Loading data page')); return $this; }
.... 'components' => [ ... 'parser' => [ 'class' => 'app\components\ParserXenforo', 'host' => 'http://9af5766eb2759a49.demo-xenforo.com/130/index.php?', 'username' => 'admin', 'password' => 'admin', 'curlOpt' => [ 'userAgent' => 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36', 'header' => [ 'Accept: text/html, application/xml;q=0.9, application/xhtml+xml, image/png, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1', 'Accept-Language: en-US,en;q=0.8,ru;q=0.6,uk;q=0.4', 'Accept-Charset: Windows-1251, utf-8, *;q=0.1', 'Accept-Encoding: deflate, identity, *;q=0', ] ] ], ... ], ....
$urlThread = 'http://9af5766eb2759a49.demo-xenforo.com/130/index.php?threads/some-thread.1/'; /** @var \app\components\ParserXenforo $dataParse */ $dataParse = Yii::$app->parser->loadUsingCurl($urlThread);
public function createDomDocument() { $this->_dom = new \DOMDocument(); libxml_use_internal_errors(true); if ($this->_dom->loadHTML($this->_data)) { Yii::info(Yii::t('app', 'Create DomDocument')); } else { Yii::info(Yii::t('app', 'An error occurred when creating an object of class DOMDocument')); } libxml_use_internal_errors(false); return $this; }
public function createDomXpath() { $this->_xpath = new \DOMXPath($this->_dom); Yii::info(Yii::t('app', 'Create DomXpath')); return $this; }
public function parseTitle() { $xpathQuery = '*//h1'; $nodes = $this->_xpath->query($xpathQuery, $this->_dom); if ($nodes->length === 0) { Yii::info(Yii::t('app', 'Error parse title')); } $this->_title = $nodes->item(0)->nodeValue; Yii::info(Yii::t('app', 'Parse title')); return $this; }
public function parseTimestamp() { $xpathQuery = '*//p[@id="pageDescription"]/a/abbr'; $nodes = $this->_xpath->query($xpathQuery, $this->_dom); if ($nodes->length === 0) { Yii::info(Yii::t('app', 'Error parse timestamp')); return $this; } // timestamp $this->_timestamp = $nodes->item(0)->getAttribute('data-time'); Yii::info(Yii::t('app', 'Parse timestamp')); return $this; }
public function parseContent() { $xpathQuery = '*//blockquote[@class="messageText ugc baseHtml"]'; $nodes = $this->_xpath->query($xpathQuery, $this->_dom); if ($nodes->length === 0) { Yii::info(Yii::t('app', 'Error parse content')); return $this; } $this->_content = $nodes->item(0)->nodeValue; Yii::info(Yii::t('app', 'Parse content')); return $this; }
/** * @return \app\components\ParserXenforo */ public function endParse() { if (isset($this->_content, $this->_timestamp, $this->_content)) { Yii::info(Yii::t('app', 'End parse')); } else { Yii::info(Yii::t('app', 'Some data were not received')); } return $this; } /** * @return string title */ public function getTitle() { return $this->_title; } /** * @return int timestamp */ public function getTimestamp() { return $this->_timestamp; } /** * @return string content */ public function getContent() { return $this->_content; }
$urlThread = 'http://9af5766eb2759a49.demo-xenforo.com/130/index.php?threads/some-thread.1/'; /** @var \app\components\ParserXenforo $dataParse */ $dataParse = Yii::$app->parser ->loadUsingCurl($urlThread) ->createDomDocument() ->createDomXpath() ->parseTitle() ->parseTimeStamp() ->parseContent() ->endParse(); return $this->render('index', ['data' => $dataParse]);
<?php /** * @var yii\web\View $this * @var \app\components\ParserXenforo $data */ $this->title = 'My Yii Application'; ?> <div class="site-index"> <h1><?= $data->title; ?></h1> <p>Created At: <?= date('Ymd H:i:s', $data->timestamp); ?></p> <p><?= $data->content; ?></p> </div>
Source: https://habr.com/ru/post/216227/
All Articles