웹스크래핑

In [92]:
from selenium import webdriver
from bs4 import BeautifulSoup
import re
def cleanText(readData):
 
    #텍스트에 포함되어 있는 특수 문자 제거
 
    text = re.sub('[-=+,#/\?:^$.@*\"※~&%ㆍ!』\\‘|\(\)\[\]\<\>`\'…》]', '', readData)
 
    return text
driver = webdriver.Chrome('C:/Users/User/Downloads/chromedriver')
#웹 자원 로드를 위해 3초 대기
driver.implicitly_wait(3)
#url접근
keyword = "주식"
URL = 'https://search.naver.com/search.naver?where=news&sm=tab_jum&query='
driver.get('https://search.naver.com/search.naver?where=news&sm=tab_jum&query='+keyword)
#url=driver.find_element_by_css_selector('a._sp_each_title').text
title = driver.find_element_by_css_selector('a._sp_each_title').text
print(title)
print(URL)
 
KB금융, 푸르덴셜생명 2.3조에 인수…주식매매계약 체결(종합2보)
https://search.naver.com/search.naver?where=news&sm=tab_jum&query=
In [93]:
#print(driver)
import urllib.request
selected_tag_a = driver.find_element_by_tag_name('a')
selected_link = driver.find_elements_by_partial_link_text('')
soup = BeautifulSoup(driver.page_source, 'lxml')
resp = urllib.request.urlopen(URL)
soup1 = BeautifulSoup(resp,from_encoding=resp.info().get_param('charset'), features='html.parser')
items = soup.select('a._sp_each_title')
d1 = soup.select('dt')
url = soup.select('a._sp_each_url')
source_time = soup.select('dd.txt_inline')
def rr(data):
    result = re.sub("<.+?>", "", str(data))
    return result

#OMG = re.sub("<.+?>", "", str(items))
OMG = rr(url)
OMG2 = rr(source_time)
print(OMG2)
print(OMG)
test1 = soup1.find_all('a', herf=True)
#import pandas as pd
#data = {'name' : OMG, 'time_souce' : OMG2}
#pd.DataFrame(data)
print(test1)
print(resp)
 
[연합뉴스언론사 선정  1일 전  네이버뉴스   보내기  , 서울경제언론사 선정  9시간 전  네이버뉴스   보내기  , 중앙일보언론사 선정  3시간 전  네이버뉴스   보내기  , 조선일보언론사 선정  2일 전  네이버뉴스   보내기  , 파이낸셜뉴스언론사 선정  1일 전  네이버뉴스   보내기  , 아이뉴스24언론사 선정  5시간 전  네이버뉴스   보내기  , KBS  1일 전  네이버뉴스   보내기  , MBC  1일 전  네이버뉴스   보내기  , EBN  1일 전   보내기  , 뉴스핌  1일 전   보내기  ]
[네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, 네이버뉴스, KB금융 '푸르덴셜생명보험' 주식매매계약 체결, 대신證 "국내외 주식 1억이상 거래하고, 축하금 받자"]
[]
<http.client.HTTPResponse object at 0x00000254F3886CC0>
In [133]:
import urllib.request
from bs4 import BeautifulSoup

url = "https://search.naver.com/search.naver?where=news&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D"
req = urllib.request.Request(url)
sourcecode = urllib.request.urlopen(url).read()
soup = BeautifulSoup(sourcecode, "html.parser")
#aa = soup.find("div", class_ = "news mynews section _prs_nws").find_all("li")
#print(aa[3].find("a")["href"])
aaa = [0]
#for href in soup.find("div", class_ = "news mynews section _prs_nws").find_all("li"):
	#print(href.get('href'))
    
for href in soup.find_all("a"):
    print(href.get('href'))
 
#lnb
#content
https://www.naver.com
#
https://help.naver.com/support/alias/search/word/word_16.naver
#
#
https://nid.naver.com/nidlogin.login?url=https%3A%2F%2Fsearch.naver.com%2Fsearch.naver%3Fwhere%3Dnews%26sm%3Dtab_jum%26query%3D%25EC%25A3%25BC%25EC%258B%259D
https://help.naver.com/support/alias/search/word/word_16.naver
https://help.naver.com/support/alias/search/word/word_21.naver
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
#
?where=nexearch&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=news&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
https://dict.naver.com/search.nhn?dicQuery=%EC%A3%BC%EC%8B%9D&query=%EC%A3%BC%EC%8B%9D&target=dic&query_utf=&isOnlyViewEE=
?where=article&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=realtime&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=image&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
https://book.naver.com/search/search.nhn?query=%EC%A3%BC%EC%8B%9D
?where=post&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
#
?where=kin&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=kdic&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=webkr&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
?where=video&sm=tab_jum&query=%EC%A3%BC%EC%8B%9D
https://search.shopping.naver.com/search/all.nhn?where=all&frm=NVSCTAB&query=%EC%A3%BC%EC%8B%9D
https://map.naver.com/v5/search/%EC%A3%BC%EC%8B%9D
https://m.post.naver.com/search/post.nhn?keyword=%EC%A3%BC%EC%8B%9D
https://vibe.naver.com/search?query=%EC%A3%BC%EC%8B%9D
https://academic.naver.com/search.naver?field=0&query=%EC%A3%BC%EC%8B%9D
https://audioclip.naver.com/search/all?keyword=%EC%A3%BC%EC%8B%9D
#
https://help.naver.com/support/alias/search/integration/integration_1.naver
https://help.naver.com/support/alias/search/integration/integration_2.naver
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
#
https://help.naver.com/support/alias/search/integration/news2.naver
#order_cat
#order_abc
#
#
https://help.naver.com/support/alias/search/integration/integration_4.naver
#
#
#
#
#
#
#
http://yna.kr/AKR20200410041052002?did=1195m
http://yna.kr/AKR20200410041052002?did=1195m
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=001&aid=0011539571
#
http://www.newsis.com/view/?id=NISX20200410_0000990286&cID=10404&pID=10400
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=003&aid=0009809215
http://ilyo.co.kr/?ac=article_view&entry_id=366823
https://www.vop.co.kr/A00001481268.html
#
https://www.sedaily.com/NewsView/1Z1FM2MBWX
https://www.sedaily.com/NewsView/1Z1FM2MBWX
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=011&aid=0003723145
#
http://www.newspim.com/news/view/20200410001105
https://news.joins.com/article/olink/23346890
https://news.joins.com/article/olink/23346890
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=025&aid=0002992034
#
https://news.chosun.com/site/data/html_dir/2020/04/09/2020040901990.html?utm_source=naver&utm_medium=original&utm_campaign=news
https://news.chosun.com/site/data/html_dir/2020/04/09/2020040901990.html?utm_source=naver&utm_medium=original&utm_campaign=news
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=023&aid=0003522288
#
http://yna.kr/AKR20200409046200003?did=1195m
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=001&aid=0011534925
https://biz.chosun.com/site/data/html_dir/2020/04/09/2020040902083.html?utm_source=naver&utm_medium=original&utm_campaign=biz
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=366&aid=0000507239
http://www.dt.co.kr/contents.html?article_no=2020040902109932781006&ref=naver
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=029&aid=0002592977
http://news.khan.co.kr/kh_news/khan_art_view.html?artid=202004091544001&code=920101
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=032&aid=0003002790
#
http://www.fnnews.com/news/202004101440328115
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=014&aid=0004406408
#
http://www.cnbnews.com/news/article.html?no=443020
http://www.cnews.co.kr/uhtml/read.jsp?idxno=202004101515039580888
http://www.inews24.com/view/1257226
http://www.inews24.com/view/1257226
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=031&aid=0000533897
#
http://news.kbs.co.kr/news/view.do?ncd=4422243&ref=A
http://news.kbs.co.kr/news/view.do?ncd=4422243&ref=A
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=056&aid=0010817646
#
https://www.seoul.co.kr/news/newsView.php?id=20200410500109&wlog_tag3=naver
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=081&aid=0003081491
http://www.viva100.com/main/view.php?key=20200410010004020
http://yna.kr/AKR20200410063600002?did=1195m
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=004&oid=001&aid=0011538924
http://www.kukinews.com/news/article.html?no=775370
#
https://imnews.imbc.com/news/2020/econo/article/5718850_32647.html
https://imnews.imbc.com/news/2020/econo/article/5718850_32647.html
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=214&aid=0001030053
#
https://www.sedaily.com/NewsView/1Z1F6BKVD7
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=011&aid=0003723041
http://www.ebn.co.kr/news/view/1423890/?sc=naver
http://www.ebn.co.kr/news/view/1423890/?sc=naver
#
https://view.asiae.co.kr/article/2020041014365687672
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=277&aid=0004659543
http://www.enewstoday.co.kr/news/articleView.html?idxno=1380591
http://www.busan.com/view/busan/view.php?code=2020041016065137342
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=082&aid=0000995401
http://www.weeklytoday.com/news/articleView.html?idxno=168815
#
http://www.newspim.com/news/view/20200410000854
http://www.newspim.com/news/view/20200410000854
#
http://www.shinailbo.co.kr/news/articleView.html?idxno=1268846
http://news.mt.co.kr/mtview.php?no=2020041013231790945
https://news.naver.com/main/read.nhn?mode=LSD&mid=sec&sid1=101&oid=008&aid=0004392630
http://www.ebn.co.kr/news/view/1423906/?sc=naver
http://www.newsworks.co.kr/news/articleView.html?idxno=447290
#
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=11
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=21
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=31
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=41
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=51
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=61
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=71
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=81
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=91
?&where=news&query=%EC%A3%BC%EC%8B%9D&sm=tab_pge&sort=0&photo=0&field=0&reporter_article=&pd=0&ds=&de=&docid=&nso=so:r,p:all,a:all&mynews=0&cluster_rank=33&start=11
https://help.naver.com/support/alias/news/news_21.naver
#
#
#
#
#
#
#
#
#
#
#
#
#
https://help.naver.com/support/alias/search/word/word_1.naver
?where=news&query=%EC%B2%AD%EB%8B%B4%EB%8F%99%EC%A3%BC%EC%8B%9D%EB%B6%80%EC%9E%90%EC%9D%B4%ED%9D%AC%EC%A7%84&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EC%9E%84%EC%A7%80%EC%9B%90&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EA%B5%AD%EB%82%B4%EC%A3%BC%EC%8B%9D%ED%98%95%ED%8E%80%EB%93%9C&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EC%BD%94%EC%8A%A4%ED%94%BC%EC%95%BC%EA%B0%84%EC%84%A0%EB%AC%BC&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EB%AF%B8%EA%B5%AD%EC%A3%BC%EC%8B%9D%ED%88%AC%EC%9E%90&ie=utf8&sm=tab_she&qdt=0
?where=news&query=2019%EC%A3%BC%EC%8B%9D&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EC%9E%90%EC%82%B0%EA%B4%80%EB%A6%AC&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EC%A3%BC%EC%8B%9D%EC%A0%95%EB%B3%B4&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EB%8B%AC%EB%9F%AC%EC%A0%84%EB%A7%9D&ie=utf8&sm=tab_she&qdt=0
?where=news&query=%EC%9D%B4%EB%8F%99%ED%8F%89%EA%B7%A0%EC%84%A0&ie=utf8&sm=tab_she&qdt=0
#
#
https://help.naver.com/support/alias/search/word/word_2.naver
#
#
#
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%98%A4%ED%9B%84+4%EC%8B%9C+%ED%88%AC%ED%91%9C%EC%9C%A8+23.46%25
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%98%A4%ED%9B%84+3%EC%8B%9C+%EB%88%84%EC%A0%81%ED%88%AC%ED%91%9C%EC%9C%A8+21.95%25
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%BD%94%EB%A1%9C%EB%82%9819+%EA%B2%80%EC%82%AC
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%B4%9D%EC%84%A0+%EC%82%AC%EC%A0%84%ED%88%AC%ED%91%9C
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%BD%94%EB%A1%9C%EB%82%9819+%EB%B0%B1%EC%8B%A0
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%A1%B0%EC%9A%A9%ED%95%9C+%EC%A0%84%ED%8C%8C
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%82%AC%EC%A0%84%ED%88%AC%ED%91%9C+%EB%A7%88%EC%A7%80%EB%A7%89%EB%82%A0
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EA%B5%AD%EB%AF%BC+100%EB%AA%85%EB%8B%B9+1%EB%AA%85%EA%BC%B4
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EC%82%AC%EC%A0%84%ED%88%AC%ED%91%9C+%EC%97%B4%EA%B8%B0
?where=nexearch&sm=tab_htk.nws&ie=utf8&query=%EB%A7%88%EC%A7%80%EB%A7%89+%EC%A3%BC%EB%A7%90
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EC%9D%8C%EC%95%85%EC%A4%91%EC%8B%AC+%EC%9E%84%EC%98%81%EC%9B%85
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EA%B9%80%EC%B0%BD%EC%98%A5+%EC%87%BC+%EA%B9%80%ED%98%B8%EC%A4%91
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=2%EA%B5%B0+%EC%84%A0%EC%88%98+%EB%B0%9C%EC%97%B4%EB%A1%9C+%ED%9B%88%EB%A0%A8+%EC%A4%91%EB%8B%A8
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%ED%95%98%EC%9D%B4%EC%97%90%EB%82%98+%EA%B9%80%ED%98%9C%EC%88%98
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%ED%95%98%EC%9D%B4%EC%97%90%EB%82%98+%EC%A2%85%EC%98%81+%EC%86%8C%EA%B0%90
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EC%9C%A0%EB%B3%84%EB%82%98+%EB%AC%B8%EC%85%B0%ED%94%84+%EC%97%90%EB%A6%AD
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EC%B6%9C%EC%97%B0+%EC%97%86%EC%9D%B4+1%EC%9C%84
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EB%B0%98%EC%9D%98%EB%B0%98+%EB%AA%85%EC%84%B8%EB%B9%88
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EB%8B%B9%EB%82%98%EA%B7%80+%EA%B7%80+%EB%B0%95%EC%84%B1%EA%B4%91
?where=nexearch&sm=tab_htk.ent&ie=utf8&query=%EC%9C%84%EB%84%88+Remember
https://help.naver.com/support/alias/search/word/word_3.naver
http://newssearch.naver.com/search.naver?where=rss&query=%EC%A3%BC%EC%8B%9D&field=0&nx_search_query=&nx_and_query=&nx_sub_query=&nx_search_hlquery=&is_dts=0
#
#
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_16.naver
#
#
https://nid.naver.com/nidlogin.login?url=https%3A%2F%2Fsearch.naver.com%2Fsearch.naver%3Fwhere%3Dnews%26sm%3Dtab_jum%26query%3D%25EC%25A3%25BC%25EC%258B%259D
https://help.naver.com/support/alias/search/word/word_16.naver
https://help.naver.com/support/alias/search/word/word_21.naver
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
https://help.naver.com/support/alias/search/word/word_17.naver
https://help.naver.com/support/alias/search/word/word_18.naver
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
javascript:;
#
https://searchad.naver.com/
https://news.naver.com/main/ombudsman/searchAlliance.nhn
https://help.naver.com/support/alias/search/footer/news.naver
https://help.naver.com/support/alias/report/unsound.naver
https://www.navercorp.com/

+ Recent posts