var webDownloader = function (sourceUrl) {<br>
events.EventEmitter.call( this );<br>
this .load = function (sourceUrl) {<br>
var src = url.parse(sourceUrl);<br>
var webClient = http.createClient(src.port==undefined?80:src.port,src.hostname);<br>
var get = src.pathname+(src.search==undefined? '' :src.search);<br>
sys.log( 'loading ' +src.href);<br>
var request = webClient.request( 'GET' , get ,<br>
{ 'host' : src.hostname});<br>
request.end();<br>
var miner = this ;<br>
request.on( 'response' , function (response) {<br>
// console.log('STATUS: ' + response.statusCode); <br>
// console.log('HEADERS: ' + JSON.stringify(response.headers)); <br>
response.setEncoding( 'utf8' );<br>
var body = '' ;<br>
response.on( 'data' , function (chunk) {<br>
body += chunk;<br>
});<br>
response.on( 'end' , function () {<br>
miner.emit( 'page' ,body, src);<br>
});<br>
});<br>
};<br>
}<br>
sys.inherits(webDownloader, events.EventEmitter); <br>
<br>
* This source code was highlighted with Source Code Highlighter .
var webDownloader = function (sourceUrl) {<br>
events.EventEmitter.call( this );<br>
this .load = function (sourceUrl) {<br>
var src = url.parse(sourceUrl);<br>
var webClient = http.createClient(src.port==undefined?80:src.port,src.hostname);<br>
var get = src.pathname+(src.search==undefined? '' :src.search);<br>
sys.log( 'loading ' +src.href);<br>
var request = webClient.request( 'GET' , get ,<br>
{ 'host' : src.hostname});<br>
request.end();<br>
var miner = this ;<br>
request.on( 'response' , function (response) {<br>
// console.log('STATUS: ' + response.statusCode); <br>
// console.log('HEADERS: ' + JSON.stringify(response.headers)); <br>
response.setEncoding( 'utf8' );<br>
var body = '' ;<br>
response.on( 'data' , function (chunk) {<br>
body += chunk;<br>
});<br>
response.on( 'end' , function () {<br>
miner.emit( 'page' ,body, src);<br>
});<br>
});<br>
};<br>
}<br>
sys.inherits(webDownloader, events.EventEmitter); <br>
<br>
* This source code was highlighted with Source Code Highlighter .
var webDownloader = function (sourceUrl) {<br>
events.EventEmitter.call( this );<br>
this .load = function (sourceUrl) {<br>
var src = url.parse(sourceUrl);<br>
var webClient = http.createClient(src.port==undefined?80:src.port,src.hostname);<br>
var get = src.pathname+(src.search==undefined? '' :src.search);<br>
sys.log( 'loading ' +src.href);<br>
var request = webClient.request( 'GET' , get ,<br>
{ 'host' : src.hostname});<br>
request.end();<br>
var miner = this ;<br>
request.on( 'response' , function (response) {<br>
// console.log('STATUS: ' + response.statusCode); <br>
// console.log('HEADERS: ' + JSON.stringify(response.headers)); <br>
response.setEncoding( 'utf8' );<br>
var body = '' ;<br>
response.on( 'data' , function (chunk) {<br>
body += chunk;<br>
});<br>
response.on( 'end' , function () {<br>
miner.emit( 'page' ,body, src);<br>
});<br>
});<br>
};<br>
}<br>
sys.inherits(webDownloader, events.EventEmitter); <br>
<br>
* This source code was highlighted with Source Code Highlighter .
var loader = new webDownloader();<br>
loader.on('page',vcardSearch);
var vcardSearch = function (body,src) {<br>
sys.log( 'scaning ' +src.href);;<br>
Apricot.parse(body, function (doc) {<br>
var vcardClasses = [<br>
// required <br>
'fn' ,<br>
'family-name' , 'given-name' , 'additional-name' , 'honorific-prefix' , 'honorific-suffix' ,<br>
'nickname' ,<br>
// optional <br>
'adr' , 'contact' ,<br>
'email' ,<br>
'post-office-box' , 'extended-address' , 'street-address' , 'locality' , 'region' , 'postal-code' , 'country-name' ,<br>
'bday' , 'email' , 'logo' , 'org' , 'photo' , 'tel' <br>
];<br>
var vcard = new vCard();<br>
var scanElement = function (el) {<br>
if (el==undefined) return ;<br>
<br>
if (el.className != undefined && el.className!= '' ) {<br>
var classes = el.className.split( ' ' );<br>
for ( var n in classes) {<br>
if (vcardClasses.indexOf(classes[n])>=0) {<br>
var value = el.text.trim().replace(/<\/?[^>]+(>|$)/g, '' );<br>
if (value != '' ) vcard.Values[classes[n]] = value;<br>
}<br>
}<br>
}<br>
for ( var i in el.childNodes) scanElement(el.childNodes[i]);<br>
}<br>
scanElement(doc. document .body);<br>
if (!vcard.isEmpty())<br>
sys.log( 'vCard = ' +vcard.toString());<br>
else <br>
sys.log( 'no vCard found on ' +src.href);<br>
});<br>
} <br>
<br>
* This source code was highlighted with Source Code Highlighter .
var vcardSearch = function (body,src) {<br>
sys.log( 'scaning ' +src.href);;<br>
Apricot.parse(body, function (doc) {<br>
var vcardClasses = [<br>
// required <br>
'fn' ,<br>
'family-name' , 'given-name' , 'additional-name' , 'honorific-prefix' , 'honorific-suffix' ,<br>
'nickname' ,<br>
// optional <br>
'adr' , 'contact' ,<br>
'email' ,<br>
'post-office-box' , 'extended-address' , 'street-address' , 'locality' , 'region' , 'postal-code' , 'country-name' ,<br>
'bday' , 'email' , 'logo' , 'org' , 'photo' , 'tel' <br>
];<br>
var vcard = new vCard();<br>
var scanElement = function (el) {<br>
if (el==undefined) return ;<br>
<br>
if (el.className != undefined && el.className!= '' ) {<br>
var classes = el.className.split( ' ' );<br>
for ( var n in classes) {<br>
if (vcardClasses.indexOf(classes[n])>=0) {<br>
var value = el.text.trim().replace(/<\/?[^>]+(>|$)/g, '' );<br>
if (value != '' ) vcard.Values[classes[n]] = value;<br>
}<br>
}<br>
}<br>
for ( var i in el.childNodes) scanElement(el.childNodes[i]);<br>
}<br>
scanElement(doc. document .body);<br>
if (!vcard.isEmpty())<br>
sys.log( 'vCard = ' +vcard.toString());<br>
else <br>
sys.log( 'no vCard found on ' +src.href);<br>
});<br>
} <br>
<br>
* This source code was highlighted with Source Code Highlighter .
loader.load('http://www.google.com/profiles/olostan');<br>
loader.load('http://www.flickr.com/people/olostan/');<br>
Source: https://habr.com/ru/post/102840/
All Articles