Really basic scraper using counted divs and prefixes

This commit is contained in:
2023-09-27 15:07:06 -05:00
parent 5d3c643d97
commit b1a98f59e2
5 changed files with 3062 additions and 0 deletions

11
Tests/curlCommands.txt Normal file
View File

@@ -0,0 +1,11 @@
curl -X GET -H "Content-Type: application/json" -d '{"instanceCount": 14,
"idName": ".ux-textspans",
"prefix":"US ",
"url":"https://www.ebay.com/itm/355014752155?_trkparms=amclksrc%3DITM%26aid%3D1110006%26algo%3DHOMESPLICE.SIM%26ao%3D1%26asc%3D20201210111314%26meid%3Db9d7bfc448e846fd88d6af6196122543%26pid%3D101195%26rk%3D5%26rkt%3D12%26sd%3D125187190152%26itm%3D355014752155%26pmt%3D1%26noa%3D0%26pg%3D4429486%26algv%3DSimplAMLv11WebTrimmedV3MskuWithLambda85KnnRecallV1V2V4ItemNrtInQueryAndCassiniVisualRankerAndBertRecallWithVMEV3CPCAuto%26brand%3DMiller&_trksid=p4429486.c101195.m1851&amdata=cksum%3A355014752155b9d7bfc448e846fd88d6af6196122543%7Cenc%3AAQAIAAABUObhgc4Nk8%252BdtAwOww4FKLaj%252FQ5qqgDlQCuqZA43WcPFUWDERCUugbbOk7XQv0JXlBfqCg2xKF3WcPghxGMFw2oSlXvfExEaMYr7I7LmrHcP6czY1wIMt0ORyKiCWt95xldincyyBx3g%252BNDW%252B%252FhWUgTaBhK6xAm%252BJIbCOMehu%252Bdw7Cl7%252B5IYh7smXk3oe11K772Gk2jRH3EKtZgP6B%252FlgnbOdlzXvdfx9nm%252BOFv14nym91rSP%252Fp0wbIOb9ayjgcJ%252BFrPBZFmP28lX44UnMF2tb1luPAriUk40GUO3lqhKbBiRBHaRdiQQMcQYqGH0PMIMw9ARpndx%252BhzDgl11zXK577uYvKJmCTZG%252BJsYG0kBH8jTJWhtdTz3Z7HEvndOTAx0XNofblr0%252FSfGh1VnTJs5jXxD1%252Fn86pkxTf7HyqpXKsaDdR64EbDneXYdEMMx2UixQ%253D%253D%7Campid%3APL_CLK%7Cclp%3A4429486&epid=722188521"}' 'localhost:8001/getPrice'
curl -X GET -H "Content-Type: application/json" -d '{"instanceCount": 14,
"idName": ".ux-textspans",
"prefix":"US ",
"url":"https://www.ebay.com/itm/125187190152?_trkparms=amclksrc%3DITM%26aid%3D777008%26algo%3DPERSONAL.TOPIC%26ao%3D1%26asc%3D20230823115209%26meid%3D547ade272f0245a3a38d3f775c940b40%26pid%3D101800%26rk%3D1%26rkt%3D1%26sd%3D394822890601%26itm%3D125187190152%26pmt%3D0%26noa%3D1%26pg%3D4375194%26algv%3DRecentlyViewedItemsV2SignedOut%26brand%3DMiller&_trksid=p4375194.c101800.m5481&_trkparms=parentrq%3Ad7d5ca4718a0ab4c13690428fffff6d6%7Cpageci%3A22c4128a-5d61-11ee-9d02-ee1c8ae0bfdf%7Ciid%3A1%7Cvlpname%3Avlp_homepage"}' 'localhost:8001/getPrice'

2965
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

13
package.json Normal file
View File

@@ -0,0 +1,13 @@
{
"dependencies": {
"body-parser": "^1.20.1",
"cheerio": "^1.0.0-rc.12",
"express": "^4.18.2",
"moment": "^2.29.4",
"mongo-sanitize": "^1.1.0",
"mongodb": "^6.1.0",
"node-cron": "^3.0.2",
"pug": "^3.0.2",
"request": "^2.88.2"
}
}

72
server.js Normal file
View File

@@ -0,0 +1,72 @@
var portServer = 8001;
var securePort = 8020;
var express = require('express');
var app = express();
var fs = require('fs');
var request = require('request');
var bodyParser = require("body-parser");
var moment = require('moment');
/*var fetch = require("node-fetch");*/
app.use(bodyParser.urlencoded({ extended: false }));
app.use(bodyParser.json());
var cron = require('node-cron');
var http = require('http');
var https = require('https');
var MongoClient = require('mongodb').MongoClient;
var sanitize = require('mongo-sanitize');
var cheerio = require("cheerio"); //jQuery Substitute
const pug = require('pug');
const { exec } = require('child_process');
const path = require('path');
app.set('view engine', "pug")
app.use("/images", express.static(path.join(__dirname, '/images')));
app.use("/static", express.static(path.join(__dirname, "/static")));
app.get('/', function (req, res) {
res.send('Hello World');
});
app.get("/getPrice", async function(req, res){
try{
//console.log(`Request Body: \n ${JSON.stringify(req.body["instanceCount"])}`);
let url = req.body["url"];
let prefix = req.body["prefix"];
let instanceCount = parseInt(req.body["instanceCount"]);
let idName = req.body["idName"];
let finalAnswer = "";
request(url, (err, resp, html) => {
const $ = cheerio.load(html);
finalAnswer = "9";
$(idName).each((i, el) => {
if(i == instanceCount){
const item = $(el).text();
finalAnswer = item.split(prefix).reverse()[0];
}
});
console.log(finalAnswer);
res.send(`Answer: ${finalAnswer}`);
res.end();
});
}catch(e){
console.error(e);
res.end();
}
});
const httpServer = http.createServer(app);
//const httpsServer = https.createServer(credentials, app);
var server = httpServer.listen(portServer, function () {
var host = server.address().address
var port = server.address().port
console.log("Server is listening at http://%s:%s", host, port)
});

1
testData Normal file
View File

@@ -0,0 +1 @@
Hello, world!