📜 ⬆️ ⬇️

We write a parser on Java + MySQL

Recently ran on Habré post about the database of domain names with e-mail. I decided to write a parser to safely merge the whole database. But since the service was very quickly turned down due to habraeffect (or maybe the admins fixed it, the devil knows it), I went ahead and found just a base of domains in plaintext in the .RU zone. I decided to parse it using whois on nic.ru. But on the latter, the script acts, slowly braking the drain of the base from one ip address. Exit - use proxy sheet. And, being safely strangled by a toad to buy proxy lists, I decided to write two scripts in Java:
1. Parsit samair.ru/proxy and merges into mysql proxy list.
2. Passes through the database and checks the timeout of the received proxies.


Database

Screen structure of the base of phpMyAdmin

So first parser.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;


public class ProxyHunter {

   /**
    * @param args
    * @throws ClassNotFoundException
    * @throws SQLException
    * @throws IOException
    */
   public static void main ( String [] args ) throws ClassNotFoundException, SQLException, IOException {
     // TODO Auto-generated method stub
     Class.forName ( "com.mysql.jdbc.Driver" ) ;
     // MySQL
     Connection conn = DriverManager.getConnection ( "jdbc:mysql://192.168.1.7:3306/database" , "username" , "password" ) ;
     //192.168.1.7 - , localhost
     //database -
     //username, password - MySQL
     Statement st = conn.createStatement () ;
     URL connection = null ;
     String [] replacements = new String [ 10 ] ;
     String host = null ;
     String port = null ;
     String anon_level = null ;
     String country = null ;
     int cursor = 0 ;
     HttpURLConnection urlconn = null ;
     int n = 1 ;
     while ( n <= 50 )
     {
       if ( n < 10 )
       {
         connection = new URL ( "www.samair.ru/proxy/ip-address-0"; +n+ ".htm" ) ;
       }
       else
       {
         connection = new URL ( "www.samair.ru/proxy/ip-address-"; +n+ ".htm" ) ;
       }
      
       System.out.println ( "Starting page: " +Integer.toString ( n )) ;
       urlconn = ( HttpURLConnection ) connection.openConnection () ;
       urlconn.setRequestMethod ( "GET" ) ;
       urlconn.connect () ;
       // GET samair'
       java.io.InputStream in = urlconn.getInputStream () ;
       BufferedReader reader = new BufferedReader ( new InputStreamReader ( in )) ;
       String text = null ;
       String line = null ;
       while (( line = reader.readLine ()) != null )
       {
         text += line;
       }
       //
       replacements = text.substring ( text.indexOf ( "<script src=\"http://samair.ru:81/js/m.js" type="text/javascript"> ) + "<script src=\"http://samair.ru:81/js/m.js" type="text/javascript"> .length () , text.indexOf ( "</script></head>" )) .split ( ";" ) ;
       // , , javascript'
       // 10 ,
       //replacements -
       cursor = text.indexOf ( "<tr><td>" ) ;
       while ( cursor != - 1 )
         {
         cursor += "<tr><td>" .length () ;
         host = text.substring ( cursor, text.indexOf ( "<script type=\"text/javascript\">" , cursor )) ;
         //host -
         port = text.substring ( text.indexOf ( ">document.write(\":\"+" , cursor ) + ">document.write(\":\"+" .length () , text.indexOf ( ")</script>" , cursor )) ;
         port = removeChar ( port, '+' ) ;
         for ( int i = 0 ; i< 10 ; i++ )
         {
           port = port.replaceAll ( replacements [ i ] .split ( "=" )[ 0 ] , replacements [ i ] .split ( "=" )[ 1 ]) ;
           //
         }
         //port -
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         anon_level = text.substring ( cursor, text.indexOf ( "</td><td>" , cursor )) ;
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         country = text.substring ( cursor, text.indexOf ( "</td></tr>" , cursor )) ;
         // - , )
         ResultSet rs = st.executeQuery ( "select host, port from proxies where host = '" +host+ "' and port = '" +port+ "'" ) ;
         if ( !rs.next ())
         {
           st.executeUpdate ( "INSERT INTO proxies (host, port, anon_level, country) VALUES ('" +host+ "', '" +port+ "', '" +anon_level+ "', '" +country+ "')" ) ;
           System.out.println ( "Added: " +host+ ":" +port ) ;
           // ,
         }
         cursor = text.indexOf ( "<tr><td>" , cursor ) ;
         }
      
       n++;

     }
    
     st.close () ;
     conn.close () ;
   }
  
   public static String removeChar ( String s, char c ) {
        String r = "" ;
        for ( int i = 0 ; i < s.length () ; i ++ ) {
           if ( s.charAt ( i ) != c ) r += s.charAt ( i ) ;
           }
        return r;
     }

}


And the checker himself
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.ProtocolException;
import java.net.URL;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;


public class ProxyHunter {

   /**
    * @param args
    * @throws ClassNotFoundException
    * @throws SQLException
    * @throws IOException
    */
   public static void main ( String [] args ) throws ClassNotFoundException, SQLException, IOException {
     // TODO Auto-generated method stub
     Class.forName ( "com.mysql.jdbc.Driver" ) ;
     // MySQL
     Connection conn = DriverManager.getConnection ( "jdbc:mysql://192.168.1.7:3306/database" , "username" , "password" ) ;
     //192.168.1.7 - , localhost
     //database -
     //username, password - MySQL
     Statement st = conn.createStatement () ;
     URL connection = null ;
     String [] replacements = new String [ 10 ] ;
     String host = null ;
     String port = null ;
     String anon_level = null ;
     String country = null ;
     int cursor = 0 ;
     HttpURLConnection urlconn = null ;
     int n = 1 ;
     while ( n <= 100 )
     {
       if ( n < 10 )
       {
         connection = new URL ( "www.samair.ru/proxy/ip-address-0"; +n+ ".htm" ) ;
       }
       else
       {
         connection = new URL ( "www.samair.ru/proxy/ip-address-"; +n+ ".htm" ) ;
       }
      
       System.out.println ( "Starting page: " +Integer.toString ( n )) ;
       urlconn = ( HttpURLConnection ) connection.openConnection () ;
       urlconn.setRequestMethod ( "GET" ) ;
       urlconn.connect () ;
       // GET samair'
       java.io.InputStream in = urlconn.getInputStream () ;
       BufferedReader reader = new BufferedReader ( new InputStreamReader ( in )) ;
       String text = null ;
       String line = null ;
       while (( line = reader.readLine ()) != null )
       {
         text += line;
       }
       //
       replacements = text.substring ( text.indexOf ( "<script src=\"http://samair.ru:81/js/m.js" type="text/javascript"> ) + "<script src=\"http://samair.ru:81/js/m.js" type="text/javascript"> .length () , text.indexOf ( "</script></head>" )) .split ( ";" ) ;
       // , , javascript'
       // 10 ,
       //replacements -
       cursor = text.indexOf ( "<tr><td>" ) ;
       while ( cursor != - 1 )
         {
         cursor += "<tr><td>" .length () ;
         //host -
         //- javascript'
         // plaintext'
         //
         if ( text.indexOf ( ">document.write(\":\"+" , cursor ) != - 1 )
         {
           // javascript
           host = text.substring ( cursor, text.indexOf ( "<script type=\"text/javascript\">" , cursor )) ;
           port = text.substring ( text.indexOf ( ">document.write(\":\"+" , cursor ) + ">document.write(\":\"+" .length () , text.indexOf ( ")</script>" , cursor )) ;
           port = removeChar ( port, '+' ) ;
           for ( int i = 0 ; i< 10 ; i++ )
           {
             port = port.replaceAll ( replacements [ i ] .split ( "=" )[ 0 ] , replacements [ i ] .split ( "=" )[ 1 ]) ;
             //
           }
         }
         else
         {
           // plaintext
           host = text.substring ( cursor, text.indexOf ( ":" , cursor )) ;
           port = text.substring ( text.indexOf ( ":" , cursor ) + 1 , text.indexOf ( "</td><td>" , cursor )) ;
         }
         //port -
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         anon_level = text.substring ( cursor, text.indexOf ( "</td><td>" , cursor )) ;
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         cursor = text.indexOf ( "</td><td>" , cursor ) + "</td><td>" .length () ;
         country = text.substring ( cursor, text.indexOf ( "</td></tr>" , cursor )) ;
         // - , )
         ResultSet rs = st.executeQuery ( "select host, port from proxies where host = '" +host+ "' and port = '" +port+ "'" ) ;
         if ( !rs.next ())
         {
           st.executeUpdate ( "INSERT INTO proxies (host, port, anon_level, country) VALUES ('" +host+ "', '" +port+ "', '" +anon_level+ "', '" +country+ "')" ) ;
           System.out.println ( "Added: " +host+ ":" +port ) ;
           // ,
         }
         cursor = text.indexOf ( "<tr><td>" , cursor ) ;
         }
      
       n++;

     }
    
     st.close () ;
     conn.close () ;
   }
  
   public static String removeChar ( String s, char c ) {
        String r = "" ;
        for ( int i = 0 ; i < s.length () ; i ++ ) {
           if ( s.charAt ( i ) != c ) r += s.charAt ( i ) ;
           }
        return r;
     }

}


In general, I am tormented by doubts about the correctness of the implementation of an array with threads. It seems that in Java there is something special for such purposes, but I implemented the first thing that occurred to me - an array of threads.
And do not scold the invention of the bike. Here the goal was just for fun plus working with MySQL. For the beauty of the output of this kind in the console, you can comment out all PrintStackTrace () 's.
')
Result

A piece of proven base. -1 in latency means dead proxy.

PS
The MySQL driver can be downloaded here: www.mysql.com/products/connector
Select there JDBC Driver for MySQL (Connector / J). Unzip the archive, tear out the mysql-connector-java-5.1.10-bin.jar file from there and drop it into the project folder. Then in Eclipse, right-click on the project -> Properties -> Java Build Path -> Libraries -> Add JARs and hook it there.

Here is what should work.

Pps
Exporting code from Eclipse to HTML is implemented using a Java2Html converter

© @nixan

Source: https://habr.com/ru/post/80813/


All Articles