<table> [+content+] </table>
<tr> <td> [+ 0 +] </ td> <td> [+ 1 +] </ td> <td> [+ 2 +] </ td> <td> [+ 3 +] </ td> </ tr>
<? php if (empty ($ url)) return false; // if the snippet does not give the address - complete the execution // here we need to do another bunch of checks on the validity of the URL, but we know that we will not write evil here // And we will not give dunce to access;) $ tplInner = (empty ($ tplInner))? 'parserTplInner': $ tplInner; // Set the default $ tplOuter = (empty ($ tplOuter))? 'parserTplOuter': $ tplOuter; // chunks for snippet $ c = (empty ($ count) || (! is_numeric ($ count)))? 6: $ count; // Make a certain restriction on the number of records $ c = ($ c> 100)? 100: $ c; // maximum number $ c = ($ c <1)? 1: $ c; // minimum number // initialization of the curl session $ ch = curl_init (); // set the URL and other required parameters curl_setopt ($ ch, CURLOPT_URL, $ url); curl_setopt ($ ch, CURLOPT_HEADER, 0); curl_setopt ($ ch, CURLOPT_TIMEOUT, 5); curl_setopt ($ ch, CURLOPT_RETURNTRANSFER, 1); // load the page and output it to a variable $ html = curl_exec ($ ch); // end session and release resources curl_close ($ ch); if (mb_strlen ($ html) <100) {return '';} // if the answer is too short we exit the processing. // here you have to be careful, because the most diverse code can come back, but standard HTML frames add up to about 100 characters in total. $ pattern = "/ <table (?: [^>] +)> ([\ s \ S] +) <\ / table> / i"; // this part will cut us all the tables from the site. // theoretically, this template can be stuffed into a chunk and changed there, but as practice shows, all the same, the parsing will be more or less unique for each case, and it is easier to write your own based on the code than to adjust the parameters so that the result is correct ... preg_match ($ pattern, $ html, $ matches); unset ($ matches [0]); // who does not remember - the entire found string is written to 0 element and we do not need it $ array = explode ('</ tr>', $ matches [1]); // here I went in a very tricky way and converted all the rows into array elements $ separator = '| == |'; $ table = array (); // prepared and ... foreach ($ array as & $ value) { // (bydlokod in action) $ value = str_replace ('</ td> <td', '</ td>'. $ separator. '<td', $ value); // ... made a feint with ears :) $ value = strip_tags ($ value); // it was lazy to remove table tags regularly; it’s easier to separate cells with a service character set and .. $ table [] = explode ($ separator, $ value); // ... split the remainder of strip_tags by this separator } $ i = 0; // here it is worth noting that the 0th element is the table headings. if they are not needed, then 1 and unset ($ table [0]) should be set; prescribe $ rows = ''; foreach ($ table as $ row) { // well, now we run through each row if ($ i ++> $ c) break; // check the record count $ rows. = $ modx-> parseChunk ($ tplRow, $ row, '[+', '+]'); // write the series } echo $ modx-> parseChunk ($ tplTable, array ('content' => $ rows), '[+', '+]'); // insert the series and display the result ?>
[[parser?tplInner=`parserTplInner` &tplOuter=`parserTplOuter` &url=`http://example.com` &count=`10`]]
Source: https://habr.com/ru/post/93905/
All Articles