NB stopped asking, apparently fearing to annoy a clearly more experienced interlocutor.NB: - ?
GURU : - «» . . . , ...
NB: - , ?
GURU : - google yandex , . . , .
NB: - ?
GURU : - ?… ...
For "use immediately", it is not worth reading it thoughtfully. Code for sure.
git, docker, docker-compose
git, php-common, php-cli, php-curl, php-zip, php-memcached, composerIf you have any questions at this place, I suggest that you spend 15 minutes reading all the material in full.
# root-. # , TCP-: # 11300, 11211, 4444, 5930, 8080, 8081, 8082, 8083 # # " root-" git clone \ https://oauth2:YRGzV8Ktx2ztoZg_oZZL@git.ituse.ru/deploy/esb-infrastructure.git cd esb-infrastructure docker-compose up --build -d # 3 # . # web-.
# . # process- , # - php- git clone \ https://oauth2:YRGzV8Ktx2ztoZg_oZZL@git.ituse.ru/deploy/clicker-noserver.git cd clicker-noserver composer update # . "XXXXXXXX" mv app/settings.php.dist app/settings.php # . gnome-terminal \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php curl"' \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php timezone"' \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php whoer"' # . , , # : - log/list.proxy php app/src/Utils/givethejob.php ./log/list.proxy
Excuses and failures
All package code consists of three parts.
One of them is not mine (I note that this is a neat and beautiful code). The source of this code is packagist.org .
I wrote another one myself, tried to make it understandable and devoted about a week to this part of the code.
The rest is “a difficult historical legacy.” This part of the code has been created for quite a long time. Including in that period when I still did not have a great knack for programming.
This is exactly the reason for the location of the repositories on my GitLab and the packages on my Satis . For publication on GitHub.com and packagist.org, this code will require processing and more thorough documentation.
All parts of the code are open for unlimited use. Repositories and packages will be available "forever."
However, when re-publishing the code, I will be grateful for you to post a link to me or to this article.
class esbTask // , { // immutable-; // "" ( ESB-), payload // // "" .... }
// json- esbTask { // ( esbTask) "_type":"App\\\\rebean\\\\payloads\\\\ESBtaskQueue", // "task":"task:queue@XXX.XXX.XXX.XXX:11300", // () "replyto":[ "othertask1:nextqueue1@yyy.XXX.XXX.XXX:11300", "othertask2:nextqueue2", "othertask3:nextqueue3", ], // () "onerror":[ "error:errorsstep@zzz.XXX.XXX.XXX:11300", "error:errorsstep1", "error:errorsstep2", "error:errorsstep3" ], // () "onstop":[ "stop:stopstep@kkk.XXX.XXX.XXX:11300", "stop:stopstep1", "stop:stopstep2", "stop:stopstep3" ], // "payload":{ .... }, // ... () "till":[ .... ], // ... () // // - (LINUX-TimeStamp) "since":[ 1540073089.8833, ], // - "points":1, // . "groupid":"" }
class nextStepWorker extends workerConstructor { // // esbTask // () // MQ- (beanstalkd) // (Memcached) // (MySQL) // : - ( - ); - stop-; - (log, event, mq) .... }
// app/src/Process/worker/curlChecker.php .... class curlChecker extends nextStepWorker { const PROXY_INFO = 'https://api.ipfy.me?format=json&geo=true'; const PROXY_TIMEOUT = '40'; const COMMAND = "curl -m %s -Lx http://%s:%s '%s'"; public function logic() { // . .. payload extract($this->context()); // defaults payload $curltimeout = $curltimeout ?? self::PROXY_TIMEOUT; $curlchecker = $curlchecker ?? self::PROXY_INFO; // $line = sprintf( static::COMMAND, $curltimeout, $host, $port, $curlchecker ); exec($line, $info); // // ( , stop-) $info = arrays::valid_json(implode('', info)); if (empty($info)) throw new \Exception("Bad proxy: $host:$port!", static::STATUS_STOP); // payload $this->enrich(['info']) ->sets(compact('info')); } }
// app / src / Process / worker / timeZone.php
sudo apt-get -y update sudo apt-get -y install docker-compose
# , docker- # ( ) # sudo -s git clone \ https://oauth2:YRGzV8Ktx2ztoZg_oZZL@git.ituse.ru/deploy/esb-infrastructure.git \ panels cd panels docker-compose up --build -d # .
- XXX.XXX.XXX.XXX:11300 - beanstalkd
- XXX.XXX.XXX.XXX:11211 - Memcached
- XXX.XXX.XXX.XXX:4444 - Selenium Server
- XXX.XXX.XXX.XXX:5930 - VNC -server to control what is happening in Chrome
- XXX.XXX.XXX.XXX:8081 - Web panel for communication with Memcached (admin: pass)
- XXX.XXX.XXX.XXX:8082 - A web panel to communicate with beanstalkd
- XXX.XXX.XXX.XXX:8083 - Web panel for communication with VNC (password: secret)
- XXX.XXX.XXX.XXX:8080 - Shared web panel
# , docker-, # ..../panels/ # docker-compose ps # Name Command State Ports # ------------------------------------------------------------------------------------------------------------------------ # beanstalkd /usr/bin/beanstalkd Up 0.0.0.0:11300->11300/tcp # chrome start-cron Up 0.0.0.0:4444->4444/tcp, 0.0.0.0:5930->5900/tcp # memcached docker-entrypoint.sh memcached Up 0.0.0.0:11211->11211/tcp # nginx docker-php-entrypoint /sta ... Up 0.0.0.0:8443->443/tcp, 0.0.0.0:8080->80/tcp, # 0.0.0.0:8082->8082/tcp, 0.0.0.0:8083->8083/tcp, 9000/tcp # vnc /usr/bin/supervisord -c /e ... Up 0.0.0.0:8081->8081/tcp # chrome docker exec -ti chrome /bin/bash # docker-compose stop && docker rm $(docker ps -a -q)
// app / src / Process / worker / whoerChecker.php
// app/src/Chrome/proxyHelper.php .... class proxyHelper extends sshDocker{ .... // $name - - // $files - [ ... ' -' => '', ...] protected function buildPlugin(string $name, array $files) { $this->last = "$this->cache/$name"; if (!file_exists("$this->last")) { $zip = new \ZipArchive(); $zip->open("$this->last", \ZipArchive::CREATE | \ZipArchive::OVERWRITE); foreach ($files as $n => $data) { $zip->addFromString(basename($n), $data); } $zip->close(); } $this->all[] = $this->last; $this->all = array_unique($this->all); return $this; } .... }
// app/chromePlugins/timeShift/content.js // var s = document.createElement('script'); // s.src = chrome.extension.getURL('timeshift.js'); // (document.head || document.documentElement).appendChild(s);
// app/chromePlugins/proxy/background.js var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "%scheme", host: "%proxy_host", port: parseInt(%proxy_port) }, bypassList: ["foobar.com"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function () { }); function callbackFn(details) { return { authCredentials: { username: "%username", password: "%password" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: [">all_urls<"]}, ['blocking'] );
// naming scheme for proxy plugin:
proxy- [address] - [port] - [protocol]>. zip
timeshift - ["-" | ""] - [shift_in_minute_ot_GMT] .zip
// app/src/Chrome/sshDocker.php // (DOCKER_HOST, DOCKER_USER, DOCKER_PASS) // : app/settings.php // .... class sshDocker { .... // . docker // . // : app/techs.php const EXEC_DOCKER = DOCKER_BIN_PATH . "/docker exec -i %s %s"; .... // sudo ( ), DOCKER_USER - protected function sudo(string $command, string $expect = '.*'){...} // Docker-, // - self::EXEC_DOCKER protected function execDocker(string $command, string $expect){...} .... }
// app/src/Chrome/proxyHelper.php .... class proxyHelper extends sshDocker { .... public static function new(string $docker, $plugins) { return (new self($docker, $plugins)) ->setupPlugins(); } .... }
composer require facebook/webdriver
// URL- $url = "https://example.com/books/196/empire-v-povest-o-nastoyashem-sverhcheloveke"; // URL, (Selenium Server) $server = 'http://' . DOCKER_HOST . '/wd/hub'; // , $options = new ChromeOptions(); $options->addArguments(array( '--disable-notifications' )); // - $capabilities = DesiredCapabilities::chrome(); $capabilities->setCapability(ChromeOptions::CAPABILITY, $options); // 5000 URL $driver = RemoteWebDriver::create($server, $capabilities, 5000); $page = $driver->get($url);
// app/src/Process/worker/whoerChecker.php .... class whoerChecker extends nextStepWorker { // : app/settings.php // URL Selenium Server const SELENIUM_SERVER = CHVM; // - const DOCKER_NAME = DOCKER_NAME; .... public function config() .... // : // $driver = RemoteWebDriver::create($server, $capabilities, 5000); $chrome = Chrome::driver( static::SELENIUM_SERVER, Chrome::capabilities(static::DOCKER_NAME, $plugins), 5000 ); .... } ....
// app/plugs.php const PLUGS = [ 'timeshift' => [ 'path' => PROJECTPATH . '/app/chromePlugins/timeShift', 'files' => ['manifest.json', 'timeshift.js', 'content.js'], 'fields' => ['%addsminutes' => 'timeshift'] ], 'proxy' => [ 'path' => PROJECTPATH . '/app/chromePlugins/proxy', 'files' => ['manifest.json', 'background.js'], 'fields' => [ '%proxy_host' => 'host', '%proxy_port' => 'port', '%scheme' => 'scheme', '%username' => 'user', '%password' => 'pass' ] ] ];
.... $url = 'https://__/_-_'; $page = $chrome->get($url); .... // xPath- $xpath = '/html[1]/body[1]/div[1]/div[1]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/strong[1]'; $element = page->findElement(WebDriverBy::xpath($xpath)); .... // $text = $element->getText(); // HTML- $html = $element->>getAttribute('innerHTML'); ....
// app / src / Process / worker / whoerChecker.phpFinishing the description of working with Selenium Server , I want to draw your attention to the fact that when using this technology on an industrial scale (1000 - 3000 page openings), there are often situations when a session with Selenium Server ends incorrectly. The window is abandoned. And such windows can accumulate a lot.
iptables -t raw -I PREROUTING -p udp -m multiport --dports 3478,19302 -j DROP
// app/src/Process/noserver/singleProcess.php // , // include __DIR__ . '/../../../settings.php'; use App\ESB\pipeNcacheService; use App\arrayNstring\queueDSN; use App\arrayNstring\timeSpent; use App\arrayNstring\progressString; // $path = __DIR__ . '/../worker'; // $queues = array_keys(WORKERS); $queue = $argv[1] ?? end($queues); $queue = strtolower($queue); if (!in_array($queue, $queues)) die("php $argv[0] <queue_name>" . PHP_EOL); // - $progress = new progressString("Listenning... Idle: ", 40, 20); // , $stopwatch = timeSpent::start(); // beanstalkd- list($worker, $task) = WORKERS[$queue]; $procid = ['procid' => posix_getpid()]; // beanstalkd Memcached, // ( ) $dsn = new queueDSN($task, $queue, ...QUEUE_SERVER); // ESB- $pnc = new pipeNcacheService($dsn); $pipe = $pnc->getPipe(); echo "Start listener for queue: $queue." . PHP_EOL; echo "Press Ctrl-C to stop listener." . PHP_EOL; // // while (true) { try { $job = $pipe->watch($queue) ->reserve(1); $now = new DateTime(); $opts = json_encode($pipe->getPayload($job) + $procid); $pipe->delete($job); echo PHP_EOL . "Task recived at: " . $now->format('H:i:s') . " Starting worker: $worker. "; $stopwatch = timeSpent::start(); exec("php $path/$worker $opts", $out); echo "Finished. Time spent: $stopwatch" . PHP_EOL; $stopwatch = timeSpent::start(); } catch (Throwable $exception) { echo $progress($stopwatch('%I:%S', null, $now)); } }
- app / queues.php contains the names of queues and tasks
- app / plugs.php contains a description of Chrome plugin
- app / techs.php contains calculated constants
backup.php - saves queues to a file clear.php - cleans the queues exporter.php - exports from a file with a saved queue pairs address: port givethejob.php - places tasks for the process (source - file with the address: port). may exclude some addresses from the list restore.php - restores the saved queue
// app/src/Process/worker/curlChecker.php .... $worker = new curlChecker( [ // curlChecker::WORKER => 'curlchecker', // beanstalkd curlChecker::PIPE_HOSTPORT => implode(':', QUEUE_SERVER), // Memcached curlChecker::CACHE_HOSTPORT => implode(':', MEMCACHED), // , . // - curlChecker::DB_SCRIPT => __DIR__ . '/../../../confdb.php', // , // ( -) curlChecker::INFO_START => CURL_START, // , // ( -) curlChecker::INFO_END => CURL_END, // , // // curlChecker::INFO_ADDS_END => ['host', 'port'] ], ['setupworker', 'config', 'logic'] ); ....
# root-. # , TCP-: # 11300, 11211, 4444, 5930, 8080, 8081, 8082, 8083 # . sudo -s apt -y update apt -y install git snap snap install docker apt -y install docker-compose # C # " root-" cd ~ git clone \ https://oauth2:YRGzV8Ktx2ztoZg_oZZL@git.ituse.ru/deploy/esb-infrastructure.git cd esb-infrastructure docker-compose up --build -d # 3 # . # web-.
# . # . sudo apt -y update sudo apt -y install git php-common php-cli php-curl php-zip php-memcached composer # C process- , # - php- cd /var/www git clone \ https://oauth2:YRGzV8Ktx2ztoZg_oZZL@git.ituse.ru/deploy/clicker-noserver.git cd clicker-noserver composer update # . "XXXXXXXX" mv app/settings.php.dist app/settings.php # . gnome-terminal \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php curl"' \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php timezone"' \ --tab -e 'bash -c "php app/src/Process/noserver/singleProcess.php whoer"' # . , , # : - log/list.proxy php app/src/Utils/givethejob.php ./log/list.proxy
Source: https://habr.com/ru/post/432038/