diff --git a/collector/scraper/.gitignore b/collector/scraper/.gitignore new file mode 100644 index 0000000..a14702c --- /dev/null +++ b/collector/scraper/.gitignore @@ -0,0 +1,34 @@ +# dependencies (bun install) +node_modules + +# output +out +dist +*.tgz + +# code coverage +coverage +*.lcov + +# logs +logs +_.log +report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# caches +.eslintcache +.cache +*.tsbuildinfo + +# IntelliJ based IDEs +.idea + +# Finder (MacOS) folder config +.DS_Store diff --git a/collector/scraper/README.md b/collector/scraper/README.md new file mode 100644 index 0000000..be8c885 --- /dev/null +++ b/collector/scraper/README.md @@ -0,0 +1,15 @@ +# scraper + +To install dependencies: + +```bash +bun install +``` + +To run: + +```bash +bun run index.ts +``` + +This project was created using `bun init` in bun v1.3.13. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime. diff --git a/collector/scraper/bun.lock b/collector/scraper/bun.lock new file mode 100644 index 0000000..f3432db --- /dev/null +++ b/collector/scraper/bun.lock @@ -0,0 +1,75 @@ +{ + "lockfileVersion": 1, + "configVersion": 1, + "workspaces": { + "": { + "name": "scraper", + "dependencies": { + "axios": "^1.16.0", + }, + "devDependencies": { + "@types/bun": "latest", + }, + "peerDependencies": { + "typescript": "^5", + }, + }, + }, + "packages": { + "@types/bun": ["@types/bun@1.3.13", "", { "dependencies": { "bun-types": "1.3.13" } }, "sha512-9fqXWk5YIHGGnUau9TEi+qdlTYDAnOj+xLCmSTwXfAIqXr2x4tytJb43E9uCvt09zJURKXwAtkoH4nLQfzeTXw=="], + + "@types/node": ["@types/node@25.6.2", "", { "dependencies": { "undici-types": "~7.19.0" } }, "sha512-sokuT28dxf9JT5Kady1fsXOvI4HVpjZa95NKT5y9PNTIrs2AsobR4GFAA90ZG8M+nxVRLysCXsVj6eGC7Vbrlw=="], + + "asynckit": ["asynckit@0.4.0", "", {}, "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="], + + "axios": ["axios@1.16.0", "", { "dependencies": { "follow-redirects": "^1.16.0", "form-data": "^4.0.5", "proxy-from-env": "^2.1.0" } }, "sha512-6hp5CwvTPlN2A31g5dxnwAX0orzM7pmCRDLnZSX772mv8WDqICwFjowHuPs04Mc8deIld1+ejhtaMn5vp6b+1w=="], + + "bun-types": ["bun-types@1.3.13", "", { "dependencies": { "@types/node": "*" } }, "sha512-QXKeHLlOLqQX9LgYaHJfzdBaV21T63HhFJnvuRCcjZiaUDpbs5ED1MgxbMra71CsryN/1dAoXuJJJwIv/2drVA=="], + + "call-bind-apply-helpers": ["call-bind-apply-helpers@1.0.2", "", { "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2" } }, "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ=="], + + "combined-stream": ["combined-stream@1.0.8", "", { "dependencies": { "delayed-stream": "~1.0.0" } }, "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg=="], + + "delayed-stream": ["delayed-stream@1.0.0", "", {}, "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ=="], + + "dunder-proto": ["dunder-proto@1.0.1", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.1", "es-errors": "^1.3.0", "gopd": "^1.2.0" } }, "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A=="], + + "es-define-property": ["es-define-property@1.0.1", "", {}, "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g=="], + + "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], + + "es-object-atoms": ["es-object-atoms@1.1.1", "", { "dependencies": { "es-errors": "^1.3.0" } }, "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA=="], + + "es-set-tostringtag": ["es-set-tostringtag@2.1.0", "", { "dependencies": { "es-errors": "^1.3.0", "get-intrinsic": "^1.2.6", "has-tostringtag": "^1.0.2", "hasown": "^2.0.2" } }, "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA=="], + + "follow-redirects": ["follow-redirects@1.16.0", "", {}, "sha512-y5rN/uOsadFT/JfYwhxRS5R7Qce+g3zG97+JrtFZlC9klX/W5hD7iiLzScI4nZqUS7DNUdhPgw4xI8W2LuXlUw=="], + + "form-data": ["form-data@4.0.5", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w=="], + + "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], + + "get-intrinsic": ["get-intrinsic@1.3.0", "", { "dependencies": { "call-bind-apply-helpers": "^1.0.2", "es-define-property": "^1.0.1", "es-errors": "^1.3.0", "es-object-atoms": "^1.1.1", "function-bind": "^1.1.2", "get-proto": "^1.0.1", "gopd": "^1.2.0", "has-symbols": "^1.1.0", "hasown": "^2.0.2", "math-intrinsics": "^1.1.0" } }, "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ=="], + + "get-proto": ["get-proto@1.0.1", "", { "dependencies": { "dunder-proto": "^1.0.1", "es-object-atoms": "^1.0.0" } }, "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g=="], + + "gopd": ["gopd@1.2.0", "", {}, "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg=="], + + "has-symbols": ["has-symbols@1.1.0", "", {}, "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ=="], + + "has-tostringtag": ["has-tostringtag@1.0.2", "", { "dependencies": { "has-symbols": "^1.0.3" } }, "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw=="], + + "hasown": ["hasown@2.0.3", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-ej4AhfhfL2Q2zpMmLo7U1Uv9+PyhIZpgQLGT1F9miIGmiCJIoCgSmczFdrc97mWT4kVY72KA+WnnhJ5pghSvSg=="], + + "math-intrinsics": ["math-intrinsics@1.1.0", "", {}, "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g=="], + + "mime-db": ["mime-db@1.52.0", "", {}, "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg=="], + + "mime-types": ["mime-types@2.1.35", "", { "dependencies": { "mime-db": "1.52.0" } }, "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw=="], + + "proxy-from-env": ["proxy-from-env@2.1.0", "", {}, "sha512-cJ+oHTW1VAEa8cJslgmUZrc+sjRKgAKl3Zyse6+PV38hZe/V6Z14TbCuXcan9F9ghlz4QrFr2c92TNF82UkYHA=="], + + "typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="], + + "undici-types": ["undici-types@7.19.2", "", {}, "sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg=="], + } +} diff --git a/collector/scraper/config.json b/collector/scraper/config.json new file mode 100644 index 0000000..b8c7427 --- /dev/null +++ b/collector/scraper/config.json @@ -0,0 +1,9 @@ +{ + "stores": [ + { + "name": "kroger", + "base_url": "https://shockrah.xyz", + "roducts": [] + } + ] +} diff --git a/collector/scraper/package.json b/collector/scraper/package.json new file mode 100644 index 0000000..065d460 --- /dev/null +++ b/collector/scraper/package.json @@ -0,0 +1,15 @@ +{ + "name": "scraper", + "module": "index.ts", + "type": "module", + "private": true, + "devDependencies": { + "@types/bun": "latest" + }, + "peerDependencies": { + "typescript": "^5" + }, + "dependencies": { + "axios": "^1.16.0" + } +} diff --git a/collector/scraper/src/data/config.ts b/collector/scraper/src/data/config.ts new file mode 100644 index 0000000..00a65b3 --- /dev/null +++ b/collector/scraper/src/data/config.ts @@ -0,0 +1,59 @@ +import fsPromises from 'fs/promises'; + +interface Product { + name: string, + id: string +} + +export interface Store { + name: string, + base_url: string, + products: Array +} + + + +/* + * Here we control what stores and what products we are going to query + * from the configuration since the collector is basically just a oneshot + * tool + */ +export interface Config { + stores: Array +} + +/* + * Handling the validation of our configuration + * + */ +function valid(data: any) : data is Config { + if('stores' in data) { + if(!(data['stores'] instanceof Array)) { + console.error('stores is not an array!') + return false + } + for(const store in data['stores']) { + // TODO: i don't like this check but it's fine for now :thinking: + const keys = Object.keys(store); + return keys.includes('name') && keys.includes('base_url') && keys.includes('products') + } + } + console.error('Invalid configuration') + return false +} + +/* + * Handling the loading and generation of the Configuration that we'll use + * during the run of data fetching. + */ +export async function LoadConfig(path: string) : Promise { + try { + const file = await fsPromises.readFile(path); + const config: Config = JSON.parse(file.toString()) + return valid(config) ? config : null; + + } catch { + return null; + } +} + diff --git a/collector/scraper/src/data/data.ts b/collector/scraper/src/data/data.ts new file mode 100644 index 0000000..1eadf02 --- /dev/null +++ b/collector/scraper/src/data/data.ts @@ -0,0 +1,32 @@ +export interface Store { + business_id: string, + businesses: string +} + +// why: because type completion is going to matter later on +export enum ProductType { + service, + good +} +export enum ProductUnitType { + item, + weight, + hour, + volume, +} + +export interface Product { + id: string, + + business_id: string, // references the business_id in a Store entry + + product_name: string, + product_type: ProductType, + + price: number, + currency: string, // will likely just be USD for now + + unit_type: UnitType +} + + diff --git a/collector/scraper/src/index.ts b/collector/scraper/src/index.ts new file mode 100644 index 0000000..e0096f4 --- /dev/null +++ b/collector/scraper/src/index.ts @@ -0,0 +1,15 @@ +import { get_price } from './kroger/lib.ts'; +import { LoadConfig } from './data/config.ts'; + + +const config = LoadConfig('config.json') + +if(config === null) { + console.error('config failed to load with path config.json') +} else { + console.log('config loaded') +} + + +// Getting prices from kroger first +await get_price(null) diff --git a/collector/scraper/src/kroger/lib.ts b/collector/scraper/src/kroger/lib.ts new file mode 100644 index 0000000..375618f --- /dev/null +++ b/collector/scraper/src/kroger/lib.ts @@ -0,0 +1,15 @@ +/* + * This module's purpose is to expose the API calls from Kroger + * + * lib.rs - exposes high level API calls with details abstracted to sub-files + * + */ + +import { Store as StoreConfig } from '../data/config.ts'; + +async function get_session_token(api_key: string) : string { +} + +export async function get_price(config: StoreConfig|null) { + console.log('pretending to get prices'); +} diff --git a/collector/scraper/tsconfig.json b/collector/scraper/tsconfig.json new file mode 100644 index 0000000..b2e7497 --- /dev/null +++ b/collector/scraper/tsconfig.json @@ -0,0 +1,30 @@ +{ + "compilerOptions": { + // Environment setup & latest features + "lib": ["ESNext"], + "target": "ESNext", + "module": "Preserve", + "moduleDetection": "force", + "jsx": "react-jsx", + "allowJs": true, + "types": ["bun"], + + // Bundler mode + "moduleResolution": "bundler", + "allowImportingTsExtensions": true, + "verbatimModuleSyntax": true, + "noEmit": true, + + // Best practices + "strict": true, + "skipLibCheck": true, + "noFallthroughCasesInSwitch": true, + "noUncheckedIndexedAccess": true, + "noImplicitOverride": true, + + // Some stricter flags (disabled by default) + "noUnusedLocals": false, + "noUnusedParameters": false, + "noPropertyAccessFromIndexSignature": false + } +}