func getRecentId(topUrl string) int { var url bytes.Buffer url.WriteString(topUrl) url.WriteString("/recent") log.Info("Processing recent torrents page at: %s", url.String()) doc, err := goquery.NewDocument(url.String()) if err != nil { log.Critical("Can't download recent torrents page from TPB: %v", err) return 0 } topTorrent := doc.Find("#searchResult .detName a").First() t, pT := topTorrent.Attr("title") u, pU := topTorrent.Attr("href") if pT && pU { rx, _ := regexp.Compile(`\/torrent\/(\d+)\/.*`) if rx.MatchString(u) { id, err := strconv.Atoi(rx.FindStringSubmatch(u)[1]) if err != nil { log.Critical("Can't retrieve latest torrent id") return 0 } log.Info("The most recent torrent is %s and it's id is %d", t, id) return id } } return 0 }
func (d *Downloader) run() { d.wg.Add(streams) for w := 0; w <= streams; w++ { go d.processPage() } for w := d.initialId; w >= 0; w-- { d.pageId <- w } close(d.pageId) log.Info("Processing complete, waiting for goroutines to finish") d.wg.Wait() d.output.Done() }
func (d *Downloader) processPage() { for id := range d.pageId { var url bytes.Buffer url.WriteString(d.topUrl) url.WriteString("/torrent/") url.WriteString(strconv.Itoa(id)) log.Info("Parsing torrent page at: %s", url.String()) doc, err := goquery.NewDocument(url.String()) if err != nil { log.Warning("Can't download torrent page %s from TPB: %v", url, err) continue } torrentData := doc.Find("#detailsframe") if torrentData.Length() < 1 { log.Warning("Erroneous torrent %d: \"%s\"", id, url.String()) continue } torrent := TorrentEntry{Id: id} torrent.processTitle(torrentData) torrent.processFirstColumn(torrentData) torrent.processSecondColumn(torrentData) torrent.processHash(torrentData) torrent.processMagnet(torrentData) torrent.processInfo(torrentData) d.output.Put(&torrent) log.Info("Processed torrent %d: \"%s\"", id, torrent.Title) } d.wg.Done() }
id , , , , , , , , ,
{ "index": { "analysis": { "analyzer": { "customHTMLSnowball": { "type": "custom", "char_filter": [ "html_strip" ], "tokenizer": "icu_tokenizer", "filter": [ "icu_normalizer", "icu_folding", "lowercase", "stop", "snowball" ] } } } } }
curl -XPUT http://127.0.0.1:9200/tpb -d @tpb-settings.json
{ "properties" : { "Id" : { "type" : "long", "index" : "no" }, "Title" : { "type" : "string", "index" : "analyzed", "analyzer" : "customHTMLSnowball" }, "Size" : { "type" : "long", "index" : "no" }, "Files" : { "type" : "long", "index" : "no" }, "Category" : { "type" : "string", "index" : "not_analyzed" }, "Subcategory" : { "type" : "string", "index" : "not_analyzed" }, "By" : { "type" : "string", "index" : "no" }, "Hash" : { "type" : "string", "index" : "not_analyzed" }, "Uploaded" : { "type" : "date", "index" : "no" }, "Magnet" : { "type" : "string", "index" : "no" }, "Info" : { "type" : "string", "index" : "analyzed", "analyzer" : "customHTMLSnowball" } } }
curl -XPUT http://127.0.0.1:9200/tpb/_mappings/torrent -d @tpb-mapping.json
func (i *Indexer) Run() { for i.scaner.Scan() { var t TorrentEntry err := json.Unmarshal(i.scaner.Bytes(), &t) if err != nil { log.Warning("Failed to parse entry %s", i.scaner.Text()) continue } _, err = i.es.Index().Index(i.index).Type("torrent").BodyJson(t).Do() if err != nil { log.Warning("Failed to index torrent entry %s with id %d", t.Title, t.Id) continue } log.Info("Indexed %s", t.Title) } i.file.Close() }
<form class="form-horizontal"> <div class="form-group"> <label for="queryInput" class="col-sm-2 control-label">Title</label> <div class="col-sm-10"> <input type="text" class="form-control input-sm" id="queryInput" placeholder="Big Buck Bunny" ng-model="query"> </div> </div> <div class="form-group"> <div class="col-sm-offset-2 col-sm-10"> <div class="checkbox"> <label> <input type="checkbox" ng-model="useInfo"> Look in torrent info too. </label> </div> </div> </div> <div class="form-group text-right"> <div class="col-sm-offset-2 col-sm-10"> <button type="submit" class="btn btn-default" ng-click="searchClick()">Search</button> </div> </div> </form>
<div class="panel panel-warning" ng-cloak ng-show="categories.length >0"> <div class="panel-heading"> <h3 class="panel-title">Categories:</h3> </div> <div class="panel-body"> <div ng-repeat="cat in categories | orderBy: 'key'"> <p class="text-justify"> <button class="btn btn-warning wide_button" ng-class="{'active': cat.active}" ng-click="categoryClick(cat)">{{cat.key}} <span class="badge">{{cat.doc_count}}</span></button> </p> </div> </div> </div> <div class="panel panel-warning" ng-cloak ng-show="SubCategories.length >0 && filterCategories.length >0"> <div class="panel-heading"> <h3 class="panel-title">Sub categories:</h3> </div> <div class="panel-body"> <div ng-repeat="cat in SubCategories | orderBy: 'key'"> <p class="text-justify"> <button class="btn btn-success wide_button" ng-class="{'active': cat.active}" ng-click="subCategoryClick(cat)">{{cat.key}} <span class="badge">{{cat.doc_count}}</span></button> </p> </div> </div> </div>
client.search({ index: 'tpb', type: 'torrent', body: ejs.Request().agg(ejs.TermsAggregation('categories').field('Category')) }).then(function (resp) { $scope.categories = resp.aggregations.categories.buckets; $scope.errorCategories = null; }).catch(function (err) { $scope.categories = null; $scope.errorCategories = err; // if the err is a NoConnections error, then the client was not able to // connect to elasticsearch. In that case, create a more detailed error // message if (err instanceof esFactory.errors.NoConnections) { $scope.errorCategories = new Error('Unable to connect to elasticsearch.'); } });
$scope.categoryClick = function (category) { /* Mark button */ category.active = !category.active; /* Reload sub categories list */ $scope.filterCategories = []; $scope.categories.forEach(function (item) { if (item.active) { $scope.filterCategories.push(item.key); } }); if ($scope.filterCategories.length > 0) { $scope.loading = true; client.search({ index: 'tpb', type: 'torrent', body: ejs.Request().agg(ejs.FilterAggregation('SubCategoryFilter').filter(ejs.TermsFilter('Category', $scope.filterCategories)).agg(ejs.TermsAggregation('categories').field('Subcategory').size(50))) }).then(function (resp) { $scope.SubCategories = resp.aggregations.SubCategoryFilter.categories.buckets; $scope.errorSubCategories = null; //Restore selection $scope.SubCategories.forEach(function (item) { if ($scope.selectedSubCategories[item.key]) { item.active = true; } }); } ).catch(function (err) { $scope.SubCategories = null; $scope.errorSubCategories = err; // if the err is a NoConnections error, then the client was not able to // connect to elasticsearch. In that case, create a more detailed error // message if (err instanceof esFactory.errors.NoConnections) { $scope.errorSubCategories = new Error('Unable to connect to elasticsearch.'); } }); } else { $scope.selectedSubCategories = {}; $scope.filterSubCategories = []; } $scope.searchClick(); };
$scope.buildQuery = function () { var match = null; if ($scope.query) { if ($scope.useInfo) { match = ejs.MultiMatchQuery(['Title', 'Info'], $scope.query); } else { match = ejs.MatchQuery('Title', $scope.query); } } else { match = ejs.MatchAllQuery(); } var filter = null; if ($scope.filterSubCategories.length > 0) { filter = ejs.TermsFilter('Subcategory', $scope.filterSubCategories); } if ($scope.filterCategories.length > 0) { var categoriesFilter = ejs.TermsFilter('Category', $scope.filterCategories); if (filter !== null) { filter = ejs.AndFilter([categoriesFilter, filter]); } else { filter = categoriesFilter; } } var request = ejs.Request(); if (filter !== null) { request = request.query(ejs.FilteredQuery(match, filter)); } else { request = request.query(match); } request = request.from($scope.pageNo*10); return request; };
<div class="panel panel-info" ng-repeat="doc in searchResults"> <div class="panel-heading"> <h3 class="panel-title"> <a href="{{doc._source.Magnet}}">{{doc._source.Title}}</a> <!-- build:[src] img/ --> <a href="{{doc._source.Magnet}}"><img class="magnet_icon" src="assets/dist/img/magnet_link.png"></a> <!-- /build --> </h3> </div> <div class="panel-body"> <p class="text-left text-warning"> {{doc._source.Category}} / {{doc._source.Subcategory}}</p> <p class="text-center"><span class="badge">#{{doc._source.Id}}</span> <b>{{doc._source.Title}}</b> </p> <dl class="dl-horizontal"> <dt>Size</dt> <dd>{{doc._source.Size}}</dd> <dt>Files</dt> <dd>{{doc._source.Files}}</dd> <dt>Hash</dt> <dd>{{doc._source.Hash}}</dd> </dl> <div class="well" ng-bind-html="doc._source.Info"></div> <p class="text-right text-muted"> <small>Uploaded at {{doc._source.Uploaded}} by {{doc._source.By}}</small> </p> </div> </div>
Source: https://habr.com/ru/post/275339/
All Articles