File tree Expand file tree Collapse file tree 2 files changed +73
-0
lines changed Expand file tree Collapse file tree 2 files changed +73
-0
lines changed Original file line number Diff line number Diff line change
1
+ <!DOCTYPE html>
2
+ < html lang ="en " dir ="ltr ">
3
+ < head >
4
+ < meta charset ="utf-8 ">
5
+ < title > </ title >
6
+ </ head >
7
+ < body >
8
+ < h1 > This is a title</ h1 >
9
+
10
+ < p class ="subtitle "> Lorem ipsum dolor sit amet, consectetur adipisicing elit. </ p >
11
+ < p > Here's another p without a class </ p >
12
+
13
+ < ul >
14
+ < li > Rolf</ li >
15
+ < li > Charlie</ li >
16
+ < li > Jen</ li >
17
+ < li > Jose</ li >
18
+ </ ul >
19
+ </ body >
20
+ </ html >
Original file line number Diff line number Diff line change
1
+ from bs4 import BeautifulSoup
2
+
3
+ SIMPLE_HTML = '''<!DOCTYPE html>
4
+ <html lang="en" dir="ltr">
5
+ <head>
6
+ <meta charset="utf-8">
7
+ <title></title>
8
+ </head>
9
+ <body>
10
+ <h1>This is a title</h1>
11
+
12
+ <p class="subtitle">Lorem ipsum dolor sit amet, consectetur adipisicing elit. </p>
13
+ <p>Here's another p without a class </p>
14
+
15
+ <ul>
16
+ <li>Rolf</li>
17
+ <li>Charlie</li>
18
+ <li>Jen</li>
19
+ <li>Jose</li>
20
+ </ul>
21
+ </body>
22
+ </html>
23
+ '''
24
+
25
+ simple_soup = BeautifulSoup (SIMPLE_HTML , 'html.parser' )
26
+
27
+
28
+ def find_title ():
29
+ h1_tag = simple_soup .find ('h1' )
30
+ print (h1_tag .string )
31
+
32
+
33
+ def find_list_items ():
34
+ list_items = simple_soup .find_all ('li' )
35
+ list_contents = [e .string for e in list_items ]
36
+ print (list_contents )
37
+
38
+
39
+ def find_subtitle ():
40
+ paragraph = simple_soup .find ('p' , {'class' : 'subtitle' })
41
+ print (paragraph .string )
42
+
43
+
44
+ def find_other_paragraph ():
45
+ paragraphs = simple_soup .find_all ('p' )
46
+ other_paragraph = [p for p in paragraphs if 'subtitle' not in p .attrs .get ('class' , [])]
47
+ print (other_paragraph [0 ].string )
48
+
49
+
50
+ find_list_items ()
51
+ find_title ()
52
+ find_subtitle ()
53
+ find_other_paragraph ()
You can’t perform that action at this time.
0 commit comments