Skip to content

Commit d793f01

Browse files
committed
Scraping
1 parent d1034fc commit d793f01

File tree

2 files changed

+30
-24
lines changed

2 files changed

+30
-24
lines changed

README.md

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2382,18 +2382,21 @@ Scraping
23822382
# $ pip3 install requests beautifulsoup4
23832383
import requests
23842384
from bs4 import BeautifulSoup
2385-
url = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
2386-
html = requests.get(url).text
2387-
doc = BeautifulSoup(html, 'html.parser')
2388-
table = doc.find('table', class_='infobox vevent')
2389-
rows = table.find_all('tr')
2390-
link = rows[11].find('a')['href']
2391-
ver = rows[6].find('div').text.split()[0]
2392-
url_i = rows[0].find('img')['src']
2393-
image = requests.get(f'https:{url_i}').content
2394-
with open('test.png', 'wb') as file:
2395-
file.write(image)
2396-
print(link, ver)
2385+
URL = 'https://en.wikipedia.org/wiki/Python_(programming_language)'
2386+
try:
2387+
html = requests.get(URL).text
2388+
doc = BeautifulSoup(html, 'html.parser')
2389+
table = doc.find('table', class_='infobox vevent')
2390+
rows = table.find_all('tr')
2391+
link = rows[11].find('a')['href']
2392+
ver = rows[6].find('div').text.split()[0]
2393+
url_i = rows[0].find('img')['src']
2394+
image = requests.get(f'https:{url_i}').content
2395+
with open('test.png', 'wb') as file:
2396+
file.write(image)
2397+
print(link, ver)
2398+
except requests.exceptions.ConnectionError:
2399+
print("You've got problems with connection.")
23972400
```
23982401

23992402

index.html

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2054,18 +2054,21 @@
20542054
<div><h2 id="scraping"><a href="#scraping" name="scraping">#</a>Scraping</h2><div><h4 id="scrapespythonsurlversionnumberandlogofromwikipediapage">Scrapes Python's URL, version number and logo from Wikipedia page:</h4><pre><code class="python language-python hljs"><span class="hljs-comment"># $ pip3 install requests beautifulsoup4</span>
20552055
<span class="hljs-keyword">import</span> requests
20562056
<span class="hljs-keyword">from</span> bs4 <span class="hljs-keyword">import</span> BeautifulSoup
2057-
url = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
2058-
html = requests.get(url).text
2059-
doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
2060-
table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
2061-
rows = table.find_all(<span class="hljs-string">'tr'</span>)
2062-
link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
2063-
ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
2064-
url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
2065-
image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
2066-
<span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
2067-
file.write(image)
2068-
print(link, ver)
2057+
URL = <span class="hljs-string">'https://en.wikipedia.org/wiki/Python_(programming_language)'</span>
2058+
<span class="hljs-keyword">try</span>:
2059+
html = requests.get(URL).text
2060+
doc = BeautifulSoup(html, <span class="hljs-string">'html.parser'</span>)
2061+
table = doc.find(<span class="hljs-string">'table'</span>, class_=<span class="hljs-string">'infobox vevent'</span>)
2062+
rows = table.find_all(<span class="hljs-string">'tr'</span>)
2063+
link = rows[<span class="hljs-number">11</span>].find(<span class="hljs-string">'a'</span>)[<span class="hljs-string">'href'</span>]
2064+
ver = rows[<span class="hljs-number">6</span>].find(<span class="hljs-string">'div'</span>).text.split()[<span class="hljs-number">0</span>]
2065+
url_i = rows[<span class="hljs-number">0</span>].find(<span class="hljs-string">'img'</span>)[<span class="hljs-string">'src'</span>]
2066+
image = requests.get(<span class="hljs-string">f'https:<span class="hljs-subst">{url_i}</span>'</span>).content
2067+
<span class="hljs-keyword">with</span> open(<span class="hljs-string">'test.png'</span>, <span class="hljs-string">'wb'</span>) <span class="hljs-keyword">as</span> file:
2068+
file.write(image)
2069+
print(link, ver)
2070+
<span class="hljs-keyword">except</span> requests.exceptions.ConnectionError:
2071+
print(<span class="hljs-string">"You've got problems with connection."</span>)
20692072
</code></pre></div></div>
20702073

20712074

0 commit comments

Comments
 (0)