Skip to content

Commit 2b874a5

Browse files
committed
Scraping
1 parent 1ac9f6c commit 2b874a5

File tree

2 files changed

+10
-14
lines changed

2 files changed

+10
-14
lines changed

README.md

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2445,17 +2445,15 @@ Scraping
24452445
#### Scrapes Python's URL, version number and logo from its Wikipedia page:
24462446
```python
24472447
# $ pip3 install requests beautifulsoup4
2448-
import requests, sys
2449-
from bs4 import BeautifulSoup
2448+
import requests, bs4, sys
24502449
URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
24512450
try:
24522451
html = requests.get(URL).text
2453-
doc = BeautifulSoup(html, 'html.parser')
2452+
doc = bs4.BeautifulSoup(html, 'html.parser')
24542453
table = doc.find('table', class_='infobox vevent')
2455-
rows = table.find_all('tr')
2456-
link = rows[11].find('a')['href']
2457-
ver = rows[6].find('div').text.split()[0]
2458-
url_i = rows[0].find('img')['src']
2454+
link = table.find('th', text='Website').next_sibling.a['href']
2455+
ver = table.find('th', text='Stable release').next_sibling.strings.__next__()
2456+
url_i = table.find('img')['src']
24592457
image = requests.get(f'https:{url_i}').content
24602458
with open('test.png', 'wb') as file:
24612459
file.write(image)

index.html

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2140,17 +2140,15 @@
21402140
<li><strong><code class="python hljs"><span class="hljs-string">'&lt;str&gt;'</span></code> - Max age as a string: <code class="python hljs"><span class="hljs-string">'1 week, 3 days'</span></code>, <code class="python hljs"><span class="hljs-string">'2 months'</span></code>, …</strong></li>
21412141
</ul>
21422142
<div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromitswikipediapage">Scrapes Python's URL, version number and logo from its Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
2143-
<span class="hljs-keyword">import</span> requests, sys
2144-
<span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
2143+
<span class="hljs-keyword">import</span> requests, bs4, sys
21452144
URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
21462145
<span class="hljs-keyword">try</span>:
21472146
html = requests.get(URL).text
2148-
doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
2147+
doc = bs4.BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
21492148
table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
2150-
rows = table.find_all(<span class="hljs-string">'tr'</span>)
2151-
link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
2152-
ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
2153-
url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
2149+
link = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Website'</span>).next_sibling.a[<span class="hljs-string">'href'</span>]
2150+
ver = table.find(<span class="hljs-string">'th'</span>, text=<span class="hljs-string">'Stable release'</span>).next_sibling.strings.__next__()
2151+
url_i = table.find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
21542152
image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
21552153
<span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
21562154
file.write(image)

0 commit comments

Comments
 (0)