<figure> <figcaption></figcaption> <a class='image' href=''> <img class='thumb' rel='' src='' /> </a> </figure>
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )
.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
), . . , , , .
, , goquery (http://github.com/opesun/goquery). , :
// atatagrab project main.go package main import ( "fmt" // : "github.com/opesun/goquery" "strconv" ) const ( // : ENDMESSAGE = "TooLateToDieYoung" ) // func generator(out chan string, stream, start int) { for pos := start; ; pos += 20 { // : x, err := goquery.ParseUrl("http://home.atata.com/streams/" + strconv.Itoa(stream) + "?order=date&from=" + strconv.Itoa(pos)) if err == nil { // : for _, url := range x.Find("figure a.image").Attrs("href") { out <- "http://atata.com/" + url } // - .. if len(x.Find("li.last.hide")) > 0 { out <- ENDMESSAGE //.. return } } } }
,
, , ( ). - , , . , , , :
" , - ?"
, . PMFC (Poor Man's Flow Control " ". , ( ) ).
" , , ?"
! - " " " ". ? , :
( ) - . - ( - ?).
. "":
// type Worker struct { urls chan string // pending int // - index int // wg *sync.WaitGroup // }
: Worker , struct - record .
" ?" - ? Go : . , - , , :
- - - . " " ( WaitGroup ) sync . : WaitGroup , , . , .
, Worker'a.
"" work ( , "", this. ):
// func (w *Worker) work(done chan *Worker) { for { url := <-w.urls // w.wg.Add(1) // download(url) // w.wg.Done() // done <- w // } }
sync :
import ( "sync" )
: done , work() . , ( ).
download() - , . .
Worker Pool , , ( Wikipedia: ( ) ), . " container/heap ", ( - ).
: - , :
package main import ("fmt") // type Animal interface { Say() string } type Cat struct{} func (c Cat) Say() string { return "!" } type Parrot struct { name string } func (p Parrot) Say() string { return p.name + " !" } func main() { kitty := Cat{} popka := Parrot{name: ""} animals := []Animal{kitty, popka} for _, animal := range animals { fmt.Println(animal.Say()) } }
, container.Heap ( http://golang.org/pkg/container/heap/#Interface ) :
type Interface interface { sort.Interface Push(x interface{}) // add x as element Len() Pop() interface{} // remove and return element Len() - 1. }
, Push Pop sort.Interface ( http://golang.org/pkg/sort/#Interface ), :
type Interface interface { // Len is the number of elements in the collection. Len() int // Less returns whether the element with index i should sort // before the element with index j. Less(i, j int) bool // Swap swaps the elements with indexes i and j. Swap(i, j int) }
, :
// "": type Pool []*Worker // - : func (p Pool) Less(i, j int) bool { return p[i].pending < p[j].pending } // : func (p Pool) Len() int { return len(p) } // : func (p Pool) Swap(i, j int) { if i >= 0 && i < len(p) && j >= 0 && j < len(p) { p[i], p[j] = p[j], p[i] p[i].index, p[j].index = i, j } } // : func (p *Pool) Push(x interface{}) { n := len(*p) worker := x.(*Worker) worker.index = n *p = append(*p, worker) } // : func (p *Pool) Pop() interface{} { old := *p n := len(old) item := old[n-1] item.index = -1 *p = old[0 : n-1] return item }
, interface{} - , ( , interface{}). any , . : http://research.swtch.com/interfaces
, heap.* Pool, "" .
"container/heap" :
import ( "container/heap" )
. , , , :
// type Balancer struct { pool Pool // "" done chan *Worker // requests chan string // flowctrl chan bool // PMFC queue int // wg *sync.WaitGroup // }
, init() :
// . func (b *Balancer) init(in chan string) { b.requests = make(chan string) b.flowctrl = make(chan bool) b.done = make(chan *Worker) b.wg = new(sync.WaitGroup) // Flow Control: go func() { for { b.requests <- <-in // <-b.flowctrl // } }() // : heap.Init(&b.pool) for i := 0; i < WORKERS; i++ { w := &Worker{ urls: make(chan string, WORKERSCAP), index: 0, pending: 0, wg: b.wg, } go w.work(b.done) // heap.Push(&b.pool, w) // } }
- "" . , - - !
, : make() , . , - " " ; , .
init() WORKERS WORKERSCAP , :
var ( WORKERS = 5 // WORKERSCAP = 5 // )
. , , ( , . , . , , ) :
// func (b *Balancer) balance(quit chan bool) { lastjobs := false // , for { select { // : case <-quit: // b.wg.Wait() // .. quit <- true //.. case url := <-b.requests: // ( flow controller) if url != ENDMESSAGE { // - ? b.dispatch(url) // , } else { lastjobs = true // } case w := <-b.done: // , b.completed(w) // if lastjobs { if w.pending == 0 { // .. heap.Remove(&b.pool, w.index) // } if len(b.pool) == 0 { // // quit <- true // } } } } }
:
// func (b *Balancer) dispatch(url string) { w := heap.Pop(&b.pool).(*Worker) // .. w.urls <- url //.. . w.pending++ // "".. heap.Push(&b.pool, w) //.. if b.queue++; b.queue < WORKERS*WORKERSCAP { b.flowctrl <- true } }
.. . : , , , .
completed , :
// func (b *Balancer) completed(w *Worker) { w.pending-- heap.Remove(&b.pool, w.index) heap.Push(&b.pool, w) if b.queue--; b.queue == WORKERS*WORKERSCAP-1 { b.flowctrl <- true } }
, , , . : , dispatch , , , - ?
, . - download(), :
// func download(url string) { fileName := IMGDIR + "/" + url[strings.LastIndex(url, "/")+1:] output, err := os.Create(fileName) defer output.Close() response, err := http.Get(url) if err != nil { fmt.Println("Error while downloading", url, "-", err) return } defer response.Body.Close() io.Copy(output, response.Body) }
- - ( ) ( , ):
// : var ( WORKERS = 5 // WORKERSCAP = 5 // ATATASTREAM = 291 //id ATATAPOS = 0 // IMGDIR = "img" // ) // : func init() { flag.IntVar(&WORKERS, "w", WORKERS, " ") flag.IntVar(&ATATASTREAM, "s", ATATASTREAM, "id ") flag.IntVar(&ATATAPOS, "p", ATATAPOS, " ") flag.StringVar(&IMGDIR, "d", IMGDIR, " ") }
- . , , os/signal , ctrl-c :
import ( "io" "os" "os/signal" "sync" "flag" "fmt" "container/heap" "strconv" "strings" "github.com/opesun/goquery" "net/http" )
!
func main() { // flag.Parse() // , if err := os.MkdirAll(IMGDIR, 666); err != nil { panic(err) } // links := make(chan string) quit := make(chan bool) b := new(Balancer) b.init(links) // keys keys := make(chan os.Signal, 1) signal.Notify(keys, os.Interrupt) // go b.balance(quit) go generator(links, ATATASTREAM, ATATAPOS) fmt.Println(" ") // : for { select { case <-keys: // : fmt.Println("CTRL-C: ") quit <- true // case <-quit: // fmt.Println(" !") return } } }
.. ( )

.. . , - ! .
time.Sleep , . , "" .
UPD. - ( 3 744 , 3.07Gb ) .
, .
pastbin.com
PS , #291 () "" 10 , ( "-p=200" ) .
UPD2: PPS "" : - Go, , . , , - + . , : , .
,
vimeo.com/49718712 - Rob Pike - 'Concurrency Is Not Parallelism'
sites.google.com/site/gopatterns - Go Language Patterns
golangtutorials.blogspot.ru/2011/05/table-of-contents.html - GoLang Tutorials
talks.golang.org - Go
, !
Source: https://habr.com/ru/post/198150/
All Articles