import datetime, re
import pymysql
import json
from urllib.request import urlopen
from bs4 import BeautifulSoup
key = '제공받은 키'
conn = pymysql.connect(host='db 주소',\
user = 'root', passwd='db 루트의 비번', db='mysql', port = 3306,
charset = 'utf8')
cur = conn.cursor()
cur.execute("USE foodvar")
"""
def store(title, content) :
cur.execute(
"INSERT INTO pages(title, content) values (\"%s\", \"%s\")", (title,content)
)
cur.connection.commit()
def getLinks(articleUrl) :
html = urlopen("http://en.wikipedia.org"+ articleUrl)
bsObj = BeautifulSoup(html, "html.parser")
# title = bsObj.find("h1").find("span").get_text()
title = bsObj.find("h1").get_text()
content = bsObj.find("div", {"id": "mw-content-text"}).find("p").get_text()
store(title, content)
return bsObj.find("div", {"id": "bodyContent"}).findAll("a",
href = re.compile("^(/wiki/)((?!:).)*$"))
links = getLinks("/wiki/Donald_Trump")
try:
while len(links) > 0:
newArticle = links[random.randint(0, len(links)-1)].attrs["href"]
print(newArticle)
links = getLinks(newArticle)
finally:
cur.close()
conn.close()
"""
startRow = 1
endRow = 1000 #59886
tmp = 1000
for i in range(60) :
url = 'https://openapi.foodsafetykorea.go.kr/api/' + key + '/I2790/json/' + \
str(startRow) + '/' + str(tmp)
res = urlopen(url).read().decode('utf-8')
responseJson = json.loads(res).get("I2790").get("row")
for j in range(len(responseJson)) :
num = responseJson[j].get("NUM")
code = responseJson[j].get("FOOD_CD")
genre = responseJson[j].get("GROUP_NAME")
name = responseJson[j].get("DESC_KOR")
size = responseJson[j].get("SERVING_SIZE")
kcal = responseJson[j].get("NUTR_CONT1")
carbs = responseJson[j].get("NUTR_CONT2")
prot = responseJson[j].get("NUTR_CONT3")
fat = responseJson[j].get("NUTR_CONT4")
sugar = responseJson[j].get("NUTR_CONT5")
nat = responseJson[j].get("NUTR_CONT6")
chole = responseJson[j].get("NUTR_CONT7")
satur = responseJson[j].get("NUTR_CONT8")
trans = responseJson[j].get("NUTR_CONT9")
cur.execute("INSERT INTO nutri(num, code, genre, name, size, kcal, carbs, prot,\
fat, sugar, nat, chole, satur, trans) values (\"%s\", \"%s\", \"%s\", \"%s\", \
\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\")", \
(num, code, genre, name, size, kcal, carbs, prot, fat, sugar, nat, chole,
satur, trans))
cur.connection.commit()
#print(code, genre, name, size, kcal, carbs, prot, fat, sugar, nat, chole, satur, trans)
startRow = tmp + 1
tmp += 1000
cur.close()
conn.close()
#url = 'https://openapi.foodsafetykorea.go.kr/api/' + key + '/I2790/json/' + str(1) + '/' + str(2)
#res = urlopen(url).read().decode('utf-8')
#responseJson = json.loads(res)
#print(responseJson.get("I2790").get("row")[0].get("DESC_KOR"))
"""
1 NUM 번호
2 FOOD_CD 식품코드
7 GROUP_NAME 식품군
8 DESC_KOR 식품이름
12 SERVING_SIZE 총내용량
13 NUTR_CONT1 열량(kcal)(1회제공량당)
14 NUTR_CONT2 탄수화물(g)(1회제공량당)
15 NUTR_CONT3 단백질(g)(1회제공량당)
16 NUTR_CONT4 지방(g)(1회제공량당)
17 NUTR_CONT5 당류(g)(1회제공량당)
18 NUTR_CONT6 나트륨(mg)(1회제공량당)
19 NUTR_CONT7 콜레스테롤(mg)(1회제공량당)
20 NUTR_CONT8 포화지방산(g)(1회제공량당)
21 NUTR_CONT9 트랜스지방(g)(1회제공량당)
"""
동방프로젝트