from IPython.core.interactiveshell import InteractiveShell 
InteractiveShell.ast_node_interactivity = "all"


%pip install bs4


import requests

res = requests.get("https://www.example.com")
res.text

'<!doctype html>\n<html>\n<head>\n    <title>Example Domain</title>\n\n    <meta charset="utf-8" />\n    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />\n    <meta name="viewport" content="width=device-width, initial-scale=1" />\n    <style type="text/css">\n    body {\n        background-color: #f0f0f2;\n        margin: 0;\n        padding: 0;\n        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;\n        \n    }\n    div {\n        width: 600px;\n        margin: 5em auto;\n        padding: 2em;\n        background-color: #fdfdff;\n        border-radius: 0.5em;\n        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);\n    }\n    a:link, a:visited {\n        color: #38488f;\n        text-decoration: none;\n    }\n    @media (max-width: 700px) {\n        div {\n            margin: 0 auto;\n            width: auto;\n        }\n    }\n    </style>    \n</head>\n\n<body>\n<div>\n    <h1>Example Domain</h1>\n    <p>This domain is for use in illustrative examples in documents. You may use this\n    domain in literature without prior coordination or asking for permission.</p>\n    <p><a href="https://www.iana.org/domains/example">More information...</a></p>\n</div>\n</body>\n</html>\n'


# BeautifulSoup 객체를 만든다.
# 첫번째 인자는 response의 body를 텍스트로
# 두번째 인자로는 html로 분석한다고 명시

from bs4 import BeautifulSoup

soup = BeautifulSoup(res.text, "html.parser")


# 주피터 상에서는 print()로 출력해야 들여쓰기가 적용된다.
print(soup.prettify())

<!DOCTYPE html>
<html>
 <head>
  <title>
   Example Domain
  </title>
  <meta charset="utf-8"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <style type="text/css">
   body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
  </style>
 </head>
 <body>
  <div>
   <h1>
    Example Domain
   </h1>
   <p>
    This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.
   </p>
   <p>
    <a href="https://www.iana.org/domains/example">
     More information...
    </a>
   </p>
  </div>
 </body>
</html>


# title 태그 가져오기
soup.title

<title>Example Domain</title>


# head 태그 가져오기
soup.head

<head>
<title>Example Domain</title>
<meta charset="utf-8"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-type"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<style type="text/css">
    body {
        background-color: #f0f0f2;
        margin: 0;
        padding: 0;
        font-family: -apple-system, system-ui, BlinkMacSystemFont, "Segoe UI", "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
        
    }
    div {
        width: 600px;
        margin: 5em auto;
        padding: 2em;
        background-color: #fdfdff;
        border-radius: 0.5em;
        box-shadow: 2px 3px 7px 2px rgba(0,0,0,0.02);
    }
    a:link, a:visited {
        color: #38488f;
        text-decoration: none;
    }
    @media (max-width: 700px) {
        div {
            margin: 0 auto;
            width: auto;
        }
    }
    </style>
</head>


# body 태그 가져오기
soup.body

<body>
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
</body>


# h1 태그 요소 하나 찾기
soup.h1
soup.find("h1")

<h1>Example Domain</h1>

<h1>Example Domain</h1>


# p 태그 요스'들' 찾기
output = soup.find_all("p")
output[0]
output[1]

<p>This domain is for use in illustrative examples in documents. You may use this
    domain in literature without prior coordination or asking for permission.</p>

<p><a href="https://www.iana.org/domains/example">More information...</a></p>


# 태그 이름 가져오기
h1 = soup.find("h1")
print(h1)
h1.name

<h1>Example Domain</h1>

'h1'


# 태그 내용 가져오기
h1.text

'Example Domain'

일	월	화	수	목	금	토
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30

Selenium의 기초 WebDriver (0)	2023.10.26
요청시 헤더의 수정과 페이지네이션(Pagination) (0)	2023.10.25
스크래핑할 요소의 타게팅 - ID & Class (0)	2023.10.25
스크래핑할 요소의 타게팅 - 태그 (0)	2023.10.25
requests 라이브러리 (0)	2023.10.24

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

터칭 데이터

터칭 데이터

파싱과 스크래핑의 기본, BeautifulSoup 본문

파싱과 스크래핑의 기본, BeautifulSoup

2-2. 파이썬의 HTML Parser, BeautifulSoup¶

BeautifulSoup 라이브러리¶

BeautifulSoup 객체 만들기¶

'웹 스크래핑(Web scraping)' 카테고리의 다른 글

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역