MJ â–˛0.21
, COKE â–Ľ3.53
?Sr::Listing
. First we need to automate the authorization process.auto_configure
application. require 'socksify' require 'socksify/http' module Sr class TorError < RuntimeError; end class Tor # Loads the torproject test page and tests for the success message. # Just to be sure. def self.test_tor? uri = 'https://check.torproject.org/?lang=en-US' begin page = Net::HTTP.get(URI.parse(uri)) return true if page.match(/browser is configured to use Tor/i) rescue ; end false end # Our use case has the Tor SOCKS5 proxy running locally. On unix, we use # `lsof` to see the ports `tor` is listening on. def self.find_tor_ports p = `lsof -i tcp | grep "^tor" | grep "\(LISTEN\)"` p.each_line.map do |l| m = l.match(/localhost:([0-9]+) /) m[1].to_i end end # Configures future connections to use the Tor proxy, or raises TorError def self.auto_configure! return true if @configured TCPSocket::socks_server = 'localhost' ports = find_tor_ports ports.each do |p| TCPSocket::socks_port = p if test_tor? @configured = true return true end end TCPSocket::socks_server = nil TCPSocket::socks_port = nil fail TorError, "SOCKS5 connection failed; ports: #{ports}" end end end
.jpg
. It was very sad, believe me. # Basic image processing gets us to a black and white image # with most background removed def remove_background(im) im = im.equalize im = im.threshold(Magick::MaxRGB * 0.09) # the above processing leaves a black border. Remove it. im = im.trim '#000' im end
# Consider a pixel "black enough"? In a grayscale sense. def black?(p) return p.intensity == 0 || (Magick::MaxRGB.to_f / p.intensity) < 0.5 end # True iff [x,y] is a valid pixel coordinate in the image def in_bounds?(im, x, y) return x >= 0 && y >= 0 && x < im.columns && y < im.rows end # Returns new image with single-pixel "islands" removed, # see: Conway's game of life. def despeckle(im) xydeltas = [[-1, -1], [0, -1], [+1, -1], [-1, 0], [+1, 0], [-1, +1], [0, +1], [+1, +1]] j = im.dup j.each_pixel do |p, x, y| if black?(p) island = true xydeltas.each do |dx2, dy2| if in_bounds?(j, x + dx2, y + dy2) && black?(j.pixel_color(x + dx2, y + dy2)) island = false break end end im = im.color_point(x, y, '#fff') if island end end im end
# returns true if column "x" is blank (non-black) def blank_column?(im, x) (0 ... im.rows).each do |y| return false if black?(im.pixel_color(x, y)) end true end # finds columns of white, and splits the image into characters, yielding each def each_segmented_character(im) return enum_for(__method__, im) unless block_given? st = 0 x = 0 while x < im.columns # Zoom over to the first non-blank column x += 1 while x < im.columns && blank_column?(im, x) # That's now our starting point. st = x # Zoom over to the next blank column, or end of the image. x += 1 while x < im.columns && (!blank_column?(im, x) || (x - st < 2)) # slivers smaller than this can't possibly work: it's noise. if x - st >= 4 # The crop/trim here also removes vertical whitespace, which puts the # resulting bitmap into its minimal bounding box. yield im.crop(st, 0, x - st, im.rows).trim('#fff') end end end
Ai4r::Positronic
not always available, I decided to use a neural network. require 'ai4r' require 'RMagick' module Sr class Brain def initialize @keys = *(('a'..'z').to_a + ('0'..'9').to_a) @ai = Ai4r::NeuralNetwork::Backpropagation.new([GRID_SIZE * GRID_SIZE, @keys.size]) end # Returns a flat array of 0 or 1 from the image data, suitable for # feeding into the neural network def to_data(image) # New image of our actual grid size, then paste it over padded = Magick::Image.new(GRID_SIZE, GRID_SIZE) padded = padded.composite(image, Magick::NorthWestGravity, Magick::MultiplyCompositeOp) padded.get_pixels(0, 0, padded.columns, padded.rows).map do |p| ImageProcessor.black?(p) ? 1 : 0 end end # Feed this a positive example, eg, train('a', image) def train(char, image) outputs = [0] * @keys.length outputs[ @keys.index(char) ] = 1.0 @ai.train(to_data(image), outputs) end # Return all guesses, eg, {'a' => 0.01, 'b' => '0.2', ...} def classify_all(image) results = @ai.eval(to_data(image)) r = {} @keys.each.with_index do |v, i| r[v] = results[i] end r end # Returns best guess def classify(image) res = @ai.eval(to_data(image)) @keys[res.index(res.max)] end end end
cat /usr/share/dict/words *.txt | tr AZ az | grep -v '[^az]' \ | cut -c1-5 | grep '...' | sort | uniq > dict5.txt
dict5.txt
contained all the possible options that the captcha could contain. # Returns the "word" and "number" part of a captcha separately. # "word" takes the longest possible match def split_word(s) s.match(/(.+?)?(\d+)?\z/.to_a.last(2) rescue [nil, nil] end def weird_word?(s) w, d = split_word(s) # nothing matched? return true if w.nil? || d.nil? # Digit in word part?, Too long? return true if w.match /\d/ || w.size > 5 # Too many digits? return true if d.size > 3 # Yay return false end def in_dict?(w) return dict.bsearch { |p| p >= w } == w end
# az English text letter frequency, according to Wikipedia LETTER_FREQ = { a: 0.08167, b: 0.01492, c: 0.02782, d: 0.04253, e: 0.12702, f: 0.02228, g: 0.02015, h: 0.06094, i: 0.06966, j: 0.00153, k: 0.00772, l: 0.04025, m: 0.02406, n: 0.06749, o: 0.07507, p: 0.01929, q: 0.00095, r: 0.05987, s: 0.06327, t: 0.09056, u: 0.02758, v: 0.00978, w: 0.02360, x: 0.00150, y: 0.01974, z: 0.00074 }
# This finds every dictionary entry that is a single replacement away from # word. It returns in a clever priority: it tries to replace digits first, # then the alphabet, in z..e (frequency) order. As we're just focusing on the # "word" part, "9" is most definitely a mistake, and "z" is more likely a # mistake than "e". def edit1(word) # Inverse frequency, "zq...e" letter_freq = LETTER_FREQ.sort_by { |k, v| v }.map(&:first).join # Total replacement priority: 0..9zq..e replacement_priority = ('0'..'9').to_a.join + letter_freq # Generate splits, tagged with the priority, then sort them so # the splits on least-frequent english characters get processed first splits = word.each_char.with_index.map do |c, i| # Replace what we're looking for with a space w = word.dup; w[i] = ' ' [replacement_priority.index(c), w] end splits.sort_by!{|k,v| k}.map!(&:last) # Keep up with results so we don't end up with duplicates yielded = [] splits.each do |w| letter_freq.each_char do |c| candidate = w.sub(' ', c) next if yielded.include?(candidate) if in_dict?(candidate) yielded.push(candidate) yield candidate end end end end
Source: https://habr.com/ru/post/241145/
All Articles