219 lines
6.8 KiB
Python
219 lines
6.8 KiB
Python
from argparse import ArgumentParser,ArgumentTypeError
|
|
import csv
|
|
import asyncio
|
|
from pyppeteer import launch
|
|
from pyquery import PyQuery as pq
|
|
import json
|
|
import math
|
|
from os import linesep as EOL
|
|
|
|
ua = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:130.0) Gecko/20100101 Firefox/130.0'
|
|
sources = ['artcraft', 'photographers_formulary', 'bandh']
|
|
csvFile = './sources.csv'
|
|
csvOutput = './supply.csv'
|
|
|
|
def is_source (source) :
|
|
if source in sources :
|
|
return source
|
|
raise ArgumentTypeError('Not in list')
|
|
|
|
async def artcraft (url, chemical) :
|
|
browser = await launch()
|
|
page = await browser.newPage()
|
|
await page.setUserAgent(ua)
|
|
print(url)
|
|
await page.goto(url)
|
|
await asyncio.sleep(1)
|
|
html = await page.content()
|
|
#with open('./test.html', 'w') as file :
|
|
# file.write(html)
|
|
data = parse_artcraft_product(html)
|
|
await asyncio.sleep(5)
|
|
for variant in data :
|
|
#print(variant)
|
|
variant_url = f'{url}?variant={variant["id"]}'
|
|
print(variant_url)
|
|
await page.goto(variant_url)
|
|
html = await page.content()
|
|
variant['url'] = variant_url
|
|
variant['price'] = parse_artcraft_price(html)
|
|
await browser.close()
|
|
#print(html)
|
|
for v in data :
|
|
artcraft_line(chemical, v)
|
|
|
|
async def artcraft_test (url, chemical) :
|
|
print(url)
|
|
with open('./test.html', 'r') as file :
|
|
html = file.read()
|
|
data = parse_artcraft_product(html)
|
|
print(data)
|
|
|
|
def artcraft_line (chemical, v) :
|
|
#chemical,url,grams,milliliters,price
|
|
line = f'{chemical},{v["url"]},{v["weight"]},,{v["price"]}{EOL}'
|
|
print(line)
|
|
with open(csvOutput, 'a') as file :
|
|
file.write(line)
|
|
|
|
def parse_artcraft_id (text) :
|
|
return text.split('-')[2].split('_')[0]
|
|
|
|
def parse_artcraft_weight (text) :
|
|
text = text.lower()
|
|
text = text.replace('-1/2', '.5')
|
|
if 'pound' in text :
|
|
val = float(text.split(' ')[0]) * 453.592
|
|
elif 'gram' in text :
|
|
val = float(text.split(' ')[0])
|
|
else :
|
|
val = None
|
|
return val
|
|
|
|
def parse_artcraft_price (html) :
|
|
#with open('./test.html', 'w') as file :
|
|
# file.write(html)
|
|
d = pq(html)
|
|
price = None
|
|
for p in d('span.price-item').items() :
|
|
price = p.text()
|
|
break
|
|
#print(price)
|
|
if price is None :
|
|
return None
|
|
dollars = price.replace('$', '').replace(' ', '').replace('USD', '').strip()
|
|
#print(dollars)
|
|
cents = math.ceil(float(dollars) * 100.0)
|
|
return cents
|
|
|
|
def parse_artcraft_product (html) :
|
|
d = pq(html)
|
|
data = []
|
|
scr = d('variant-radios script').text()
|
|
objs = json.loads(scr)
|
|
for o in objs:
|
|
data.append({
|
|
'id' : o['id'],
|
|
'weight' : parse_artcraft_weight(o['title'])
|
|
})
|
|
return data
|
|
|
|
async def photographers_formulary (url, chemical) :
|
|
browser = await launch()
|
|
page = await browser.newPage()
|
|
await page.setUserAgent(ua)
|
|
print(url)
|
|
await page.goto(url)
|
|
await asyncio.sleep(1)
|
|
html = await page.content()
|
|
#with open('./test.html', 'w') as file :
|
|
# file.write(html)
|
|
data = parse_photographers_formulary_product(html)
|
|
for b in data :
|
|
await asyncio.sleep(2)
|
|
print(b['weight'])
|
|
b['url'] = url
|
|
if b['type'] == 'li' :
|
|
index = b['index'] + 1
|
|
await page.click(f'.productOptionViewRadio ul li:nth-of-type({index}) label input')
|
|
await asyncio.sleep(2)
|
|
html = await page.content()
|
|
b['price'] = parse_photographers_formulary_price(html)
|
|
#print(b['price'])
|
|
elif b['type'] == 'select' :
|
|
await page.select('.productOptionViewSelect select', b['value'])
|
|
await asyncio.sleep(2)
|
|
html = await page.content()
|
|
b['price'] = parse_photographers_formulary_price(html)
|
|
#print(b['price'])
|
|
await browser.close()
|
|
for d in data :
|
|
photographers_formulary_line(chemical, d)
|
|
|
|
async def photographers_formulary_test (url, chemical) :
|
|
print(url)
|
|
with open('./test.html', 'r') as file :
|
|
html = file.read()
|
|
data = parse_photographers_formulary_product(html)
|
|
print(data)
|
|
|
|
def parse_photographers_formulary_product (html) :
|
|
d = pq(html)
|
|
data = []
|
|
i = 0
|
|
select = d('.productOptionViewSelect select')
|
|
if len(select) == 0 :
|
|
for b in d('.productOptionViewRadio ul li').items() :
|
|
data.append({
|
|
'index' : i,
|
|
'type' : 'li',
|
|
'weight' : parse_photographers_formulary_weight(b.find('span').text())
|
|
})
|
|
i+=1
|
|
elif len(select) == 1 :
|
|
for o in d('.productOptionViewSelect select option').items() :
|
|
if o.attr('value').strip() != '' :
|
|
data.append({
|
|
'index' : i,
|
|
'type' : 'select',
|
|
'value' : o.attr('value'),
|
|
'weight' : parse_photographers_formulary_weight(o.text())
|
|
})
|
|
i+=1
|
|
return data
|
|
|
|
def parse_photographers_formulary_weight (text) :
|
|
parts = text.split(' ')
|
|
if parts[2] == 'g' :
|
|
return float(parts[1])
|
|
elif parts[2] == 'lb' :
|
|
return float(parts[1]) * 453.592
|
|
else :
|
|
return None
|
|
|
|
def parse_photographers_formulary_price (html) :
|
|
#with open('./test.html', 'w') as file :
|
|
# file.write(html)
|
|
d = pq(html)
|
|
price = None
|
|
for p in d('em.ProductPrice').items() :
|
|
price = p.text()
|
|
break
|
|
#print(price)
|
|
if price is None :
|
|
return None
|
|
dollars = price.replace('$', '').strip()
|
|
#print(dollars)
|
|
cents = math.ceil(float(dollars) * 100.0)
|
|
return cents
|
|
|
|
def photographers_formulary_line (chemical, v) :
|
|
#chemical,url,grams,milliliters,price
|
|
line = f'{chemical},{v["url"]},{v["weight"]},,{v["price"]}{EOL}'
|
|
print(line)
|
|
with open(csvOutput, 'a') as file :
|
|
file.write(line)
|
|
|
|
async def main () :
|
|
parser = ArgumentParser(description='Refresh prices from sources')
|
|
parser.add_argument('-s', '--source', type=is_source, required=False, default=None, help='Only run on single source')
|
|
args = parser.parse_args()
|
|
|
|
#with open(csvOutput, 'w') as file :
|
|
# file.write('chemical,url,grams,milliliters,price' + EOL)
|
|
|
|
with open(csvFile, newline='') as csvfile:
|
|
reader = csv.reader(csvfile, delimiter=',', quotechar='|')
|
|
for row in reader:
|
|
source = row[0]
|
|
chemical = row[1]
|
|
url = row[2]
|
|
if source == 'source' or (args.source is not None and source != args.source) :
|
|
continue
|
|
if source == 'artcraft' :
|
|
await artcraft(url, chemical)
|
|
elif source == 'photographers_formulary' :
|
|
await photographers_formulary(url, chemical)
|
|
|
|
if __name__ == '__main__' :
|
|
asyncio.get_event_loop().run_until_complete(main()) |