Really basic scraper using counted divs and prefixes
This commit is contained in:
11
Tests/curlCommands.txt
Normal file
11
Tests/curlCommands.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
curl -X GET -H "Content-Type: application/json" -d '{"instanceCount": 14,
|
||||
"idName": ".ux-textspans",
|
||||
"prefix":"US ",
|
||||
"url":"https://www.ebay.com/itm/355014752155?_trkparms=amclksrc%3DITM%26aid%3D1110006%26algo%3DHOMESPLICE.SIM%26ao%3D1%26asc%3D20201210111314%26meid%3Db9d7bfc448e846fd88d6af6196122543%26pid%3D101195%26rk%3D5%26rkt%3D12%26sd%3D125187190152%26itm%3D355014752155%26pmt%3D1%26noa%3D0%26pg%3D4429486%26algv%3DSimplAMLv11WebTrimmedV3MskuWithLambda85KnnRecallV1V2V4ItemNrtInQueryAndCassiniVisualRankerAndBertRecallWithVMEV3CPCAuto%26brand%3DMiller&_trksid=p4429486.c101195.m1851&amdata=cksum%3A355014752155b9d7bfc448e846fd88d6af6196122543%7Cenc%3AAQAIAAABUObhgc4Nk8%252BdtAwOww4FKLaj%252FQ5qqgDlQCuqZA43WcPFUWDERCUugbbOk7XQv0JXlBfqCg2xKF3WcPghxGMFw2oSlXvfExEaMYr7I7LmrHcP6czY1wIMt0ORyKiCWt95xldincyyBx3g%252BNDW%252B%252FhWUgTaBhK6xAm%252BJIbCOMehu%252Bdw7Cl7%252B5IYh7smXk3oe11K772Gk2jRH3EKtZgP6B%252FlgnbOdlzXvdfx9nm%252BOFv14nym91rSP%252Fp0wbIOb9ayjgcJ%252BFrPBZFmP28lX44UnMF2tb1luPAriUk40GUO3lqhKbBiRBHaRdiQQMcQYqGH0PMIMw9ARpndx%252BhzDgl11zXK577uYvKJmCTZG%252BJsYG0kBH8jTJWhtdTz3Z7HEvndOTAx0XNofblr0%252FSfGh1VnTJs5jXxD1%252Fn86pkxTf7HyqpXKsaDdR64EbDneXYdEMMx2UixQ%253D%253D%7Campid%3APL_CLK%7Cclp%3A4429486&epid=722188521"}' 'localhost:8001/getPrice'
|
||||
|
||||
curl -X GET -H "Content-Type: application/json" -d '{"instanceCount": 14,
|
||||
"idName": ".ux-textspans",
|
||||
"prefix":"US ",
|
||||
"url":"https://www.ebay.com/itm/125187190152?_trkparms=amclksrc%3DITM%26aid%3D777008%26algo%3DPERSONAL.TOPIC%26ao%3D1%26asc%3D20230823115209%26meid%3D547ade272f0245a3a38d3f775c940b40%26pid%3D101800%26rk%3D1%26rkt%3D1%26sd%3D394822890601%26itm%3D125187190152%26pmt%3D0%26noa%3D1%26pg%3D4375194%26algv%3DRecentlyViewedItemsV2SignedOut%26brand%3DMiller&_trksid=p4375194.c101800.m5481&_trkparms=parentrq%3Ad7d5ca4718a0ab4c13690428fffff6d6%7Cpageci%3A22c4128a-5d61-11ee-9d02-ee1c8ae0bfdf%7Ciid%3A1%7Cvlpname%3Avlp_homepage"}' 'localhost:8001/getPrice'
|
||||
|
||||
|
||||
2965
package-lock.json
generated
Normal file
2965
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
13
package.json
Normal file
13
package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"body-parser": "^1.20.1",
|
||||
"cheerio": "^1.0.0-rc.12",
|
||||
"express": "^4.18.2",
|
||||
"moment": "^2.29.4",
|
||||
"mongo-sanitize": "^1.1.0",
|
||||
"mongodb": "^6.1.0",
|
||||
"node-cron": "^3.0.2",
|
||||
"pug": "^3.0.2",
|
||||
"request": "^2.88.2"
|
||||
}
|
||||
}
|
||||
72
server.js
Normal file
72
server.js
Normal file
@@ -0,0 +1,72 @@
|
||||
var portServer = 8001;
|
||||
var securePort = 8020;
|
||||
var express = require('express');
|
||||
var app = express();
|
||||
var fs = require('fs');
|
||||
var request = require('request');
|
||||
var bodyParser = require("body-parser");
|
||||
var moment = require('moment');
|
||||
/*var fetch = require("node-fetch");*/
|
||||
app.use(bodyParser.urlencoded({ extended: false }));
|
||||
app.use(bodyParser.json());
|
||||
var cron = require('node-cron');
|
||||
var http = require('http');
|
||||
var https = require('https');
|
||||
var MongoClient = require('mongodb').MongoClient;
|
||||
var sanitize = require('mongo-sanitize');
|
||||
var cheerio = require("cheerio"); //jQuery Substitute
|
||||
|
||||
|
||||
const pug = require('pug');
|
||||
const { exec } = require('child_process');
|
||||
const path = require('path');
|
||||
|
||||
app.set('view engine', "pug")
|
||||
|
||||
app.use("/images", express.static(path.join(__dirname, '/images')));
|
||||
app.use("/static", express.static(path.join(__dirname, "/static")));
|
||||
|
||||
|
||||
app.get('/', function (req, res) {
|
||||
res.send('Hello World');
|
||||
});
|
||||
|
||||
app.get("/getPrice", async function(req, res){
|
||||
try{
|
||||
//console.log(`Request Body: \n ${JSON.stringify(req.body["instanceCount"])}`);
|
||||
let url = req.body["url"];
|
||||
let prefix = req.body["prefix"];
|
||||
let instanceCount = parseInt(req.body["instanceCount"]);
|
||||
let idName = req.body["idName"];
|
||||
let finalAnswer = "";
|
||||
|
||||
request(url, (err, resp, html) => {
|
||||
const $ = cheerio.load(html);
|
||||
finalAnswer = "9";
|
||||
$(idName).each((i, el) => {
|
||||
if(i == instanceCount){
|
||||
const item = $(el).text();
|
||||
finalAnswer = item.split(prefix).reverse()[0];
|
||||
}
|
||||
});
|
||||
console.log(finalAnswer);
|
||||
res.send(`Answer: ${finalAnswer}`);
|
||||
res.end();
|
||||
});
|
||||
}catch(e){
|
||||
console.error(e);
|
||||
res.end();
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
const httpServer = http.createServer(app);
|
||||
//const httpsServer = https.createServer(credentials, app);
|
||||
|
||||
var server = httpServer.listen(portServer, function () {
|
||||
var host = server.address().address
|
||||
var port = server.address().port
|
||||
|
||||
console.log("Server is listening at http://%s:%s", host, port)
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user