Сборник IT новостей про Python. Самые актуальные новости про Python на одной странице.
Читайте нас через Telegram @py_digest, RSS
Попробуйте наш тренажер IT инцидентов https://app.incidenta.tech. Вы научитесь диагностировать самые популярные сбои в IT.
""" + +ANNOUNCEMENT_DEFAULT = """#python #pydigest +IT-новости про Python перед вами. + +Часть материалов из выпуска Python Дайджест: + +links + +Заходите в гости - {digest_url} +""" + + class Section(models.Model): """ Section is a category of news-item @@ -200,7 +237,7 @@ class Meta: verbose_name_plural = _("Resources") -class Item(models.Model): +class Item(models.Model, ModelMeta): """ Item is a content, is a link """ @@ -213,9 +250,7 @@ class Item(models.Model): on_delete=models.CASCADE, ) title = models.CharField(verbose_name=_("Title"), max_length=255) - is_editors_choice = models.BooleanField( - verbose_name=_("Is editors choice"), default=True - ) + is_editors_choice = models.BooleanField(verbose_name=_("Is editors choice"), default=True) description = models.TextField(verbose_name=_("Description"), blank=True) issue = models.ForeignKey( Issue, @@ -233,7 +268,10 @@ class Item(models.Model): ) link = models.URLField(verbose_name=_("URL"), max_length=255) additionally = models.CharField( - verbose_name=_("Additional info"), max_length=255, blank=True, null=True + verbose_name=_("Additional info"), + max_length=255, + blank=True, + null=True, ) related_to_date = models.DateField( verbose_name=_("Date"), @@ -253,9 +291,7 @@ class Item(models.Model): default=ITEM_LANGUAGE_DEFAULT, ) created_at = models.DateField(verbose_name=_("Created date"), auto_now_add=True) - modified_at = models.DateTimeField( - verbose_name=_("modified date"), null=True, blank=True - ) + modified_at = models.DateTimeField(verbose_name=_("modified date"), null=True, blank=True) activated_at = models.DateTimeField( verbose_name=_("Activated date"), default=datetime.datetime.now, @@ -273,12 +309,19 @@ class Item(models.Model): article_path = models.FilePathField( verbose_name=_("Article path"), blank=True, - path=settings.DATASET_ROOT, + path=settings.PAGES_ROOT, ) tags = TaggableManager(blank=True) - keywords = TaggableManager( - verbose_name=_("Keywords"), through=KeywordGFK, blank=True - ) + keywords = TaggableManager(verbose_name=_("Keywords"), through=KeywordGFK, blank=True) + + _metadata = { + "title": "title", + "description": "meta_description", + "published_time": "activated_at", + "modified_time": "modified_at", + "locale": "meta_locale", + "url": "meta_link", + } class Meta: verbose_name = _("News") @@ -298,10 +341,16 @@ def save(self, *args, **kwargs): old_issue = Issue.objects.latest("date_to") cnt_issue = int(old_issue.title.replace("Выпуск ", "")) + 1 new_issue = Issue( - title="Выпуск %s" % cnt_issue, + title=f"Выпуск {cnt_issue}", date_from=date_from, date_to=date_to, + published_at=date_to + datetime.timedelta(days=1), + description=ISSUE_DESCRIPTION_DEFAULT, + announcement=ANNOUNCEMENT_DEFAULT.format( + digest_url=f"https://pythondigest.ru/issue/{cnt_issue}/" + ), ) + new_issue.save() self.issue = new_issue elif issue.count() == 1: @@ -322,8 +371,25 @@ def save_without_signals(self): self.save() self._disable_signals = False + @property + def meta_description(self): + return self.description[:300] + + @property + def meta_locale(self): + if self.language == "ru": + return "ru_RU" + return "en_US" + + @property + def meta_link(self): + return reverse("digest:item", kwargs={"pk": self.pk}) + @property def cls_check(self): + if not settings.CLS_ENABLED: + return 0 + try: item = ItemClsCheck.objects.get(item=self) item.check_cls() @@ -345,36 +411,52 @@ def link_type(self): @property def text(self): - nonempty_path = self.article_path is not None and self.article_path - if nonempty_path and os.path.exists(self.article_path): + if self.is_exists_text: with open(self.article_path) as fio: - result = fio.read() - else: + return fio.read() + + try: + resp = requests.get(self.link, timeout=15) + text = resp.text.strip() + if not text: + return text try: - resp = requests.get(self.link) - text = resp.text - try: - result = Document( - text, - min_text_length=50, - positive_keywords=",".join(settings.DATASET_POSITIVE_KEYWORDS), - negative_keywords=",".join(settings.DATASET_NEGATIVE_KEYWORDS), - ).summary() - except Unparseable: - result = text - except ( - KeyError, - requests.exceptions.RequestException, - requests.exceptions.Timeout, - requests.exceptions.TooManyRedirects, - ) as e: - result = "" - self.article_path = os.path.join(settings.DATASET_ROOT, f"{self.id}.html") + result = Document( + text, + min_text_length=50, + positive_keywords=",".join(settings.DATASET_POSITIVE_KEYWORDS), + negative_keywords=",".join(settings.DATASET_NEGATIVE_KEYWORDS), + ).summary() + except Exception: + result = text + except Unparseable: + result = text + except ( + KeyError, + requests.exceptions.RequestException, + requests.exceptions.Timeout, + requests.exceptions.TooManyRedirects, + ): + result = "" + self.article_path = os.path.join(settings.PAGES_ROOT, f"{self.id}.html") + if result: with open(self.article_path, "w") as fio: fio.write(result) self.save() return result + @property + def is_exists_text(self) -> bool: + existed_path = self.article_path is not None and self.article_path + if not existed_path: + return False + + if not os.path.exists(self.article_path): + return False + + with open(self.article_path) as fio: + return bool(fio.read()) + def get_data4cls(self, status=False): result = { "link": self.link, @@ -430,18 +512,25 @@ def check_cls(self, force=False): # print('Run check: {}'.format(self.pk)) prev_data = datetime.datetime.now() - datetime.timedelta(days=10) if force or self.last_check <= prev_data: - try: url = "{}/{}".format(settings.CLS_URL_BASE, "api/v1.0/classify/") - resp = requests.post( - url, data=json.dumps({"links": [self.item.data4cls]}) - ) - self.score = resp.json()["links"][0].get(self.item.link, False) + response = requests.post( + url, + json={ + "links": [self.item.data4cls], + }, + ).json() + + if "error" in response: + print(response["error"]) + return + else: + self.score = response["links"][0].get(self.item.link, False) except ( requests.exceptions.RequestException, requests.exceptions.Timeout, requests.exceptions.TooManyRedirects, - ) as e: + ): self.score = False # print('Real run check: {}'.format(self.pk)) self.save() @@ -503,6 +592,7 @@ class Meta: class Package(models.Model): + is_active = models.BooleanField(verbose_name=_("Is active"), default=True) name = models.CharField(verbose_name=_("Name"), max_length=255) description = models.TextField(verbose_name=_("Description"), blank=True) link = models.URLField(verbose_name=_("URL"), max_length=255) @@ -617,7 +707,5 @@ def run_remdow(instance, **kwargs): if "img" not in description: return - instance.description = remdow_lazy_img( - remdow_img_responsive(remdow_img_center(remdow_img_local(description))) - ) + instance.description = remdow_lazy_img(remdow_img_responsive(remdow_img_center(remdow_img_local(description)))) instance.save_without_signals() diff --git a/digest/pub_digest.py b/digest/pub_digest.py index 269cde8a..edfab486 100644 --- a/digest/pub_digest.py +++ b/digest/pub_digest.py @@ -1,4 +1,5 @@ import json +import logging import os import time from urllib.error import HTTPError @@ -7,19 +8,24 @@ import requests import tweepy import twx -from twx.botapi import TelegramBot - -# import vk +import vk from django.conf import settings from django.template.loader import render_to_string +from django.templatetags.static import static +from sentry_sdk import capture_exception +from twx.botapi import TelegramBot +from digest.management.commands import get_https_proxy from digest.pub_digest_email import send_email +logger = logging.getLogger(__name__) -def init_auth(consumer_key, consumer_secret, access_token, access_token_secret): + +def init_auth(consumer_key, consumer_secret, access_token, access_token_secret, use_proxy=True): auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) - api = tweepy.API(auth) + proxy = get_https_proxy() + api = tweepy.API(auth_handler=auth, proxy=proxy, timeout=15) return api @@ -40,13 +46,16 @@ def send_tweet_with_media(api, text, image): if "http://" not in image and "https://" not in image: assert os.path.isfile(image) file_path = image - else: - # качаем файл из сети - file_path = download_image(image) + if image == "https://pythondigest.ru/static/img/logo.png": + file_logo_path = static("img/logo.png") # -> /static/img/logo.png + file_path = os.path.abspath(f".{file_logo_path}") # to rel path + else: + # качаем файл из сети + file_path = download_image(image) assert file_path is not None, "Not found image (for twitter)" - api.update_with_media(file_path, text) + api.update_with_media(status=text, filename=file_path) class GitterAPI: @@ -100,7 +109,7 @@ def post_to_wall(api, owner_id, message, **kwargs): "from_group": 1, "owner_id": owner_id, "message": message, - "v": "5.73", + "v": "5.131", } data_dict.update(**kwargs) return api.wall.post(**data_dict) @@ -110,7 +119,7 @@ def send_message(api, user_id, message, **kwargs): data_dict = { "user_id": user_id, "message": message, - "v": "5.73", + "v": "5.131", } data_dict.update(**kwargs) return api.messages.send(**data_dict) @@ -134,7 +143,7 @@ def get_pydigest_groups() -> list: return [ (-96469126, 1), # https://vk.com/pynsk (-1540917, 0), # https://vk.com/python_developers - (-54001977, 0), # https://vk.com/pythonic_way + # (-54001977, 0), # https://vk.com/pythonic_way (-52104930, 0), # https://vk.com/club52104930 (-24847633, 1), # https://vk.com/club24847633 # (-69108280, 0), # https://vk.com/pirsipy @@ -145,7 +154,7 @@ def get_pydigest_groups() -> list: # (-38080744, 1), # https://vk.com/python_programing ] # return [ - # (-105509411, 1), # тестовая группа + # (-218211268, 1), # тестовая группа # ] @@ -158,8 +167,24 @@ def pub_to_gitter(text: str, token): time.sleep(1) -def pub_to_twitter(text, image_path, api): - send_tweet_with_media(api, text, image_path) +def pub_to_twitter(text, image_path, try_count=0): + if try_count == 5: + logger.info("Too many try for request") + return None + + try: + api = init_auth( + settings.TWITTER_CONSUMER_KEY, + settings.TWITTER_CONSUMER_SECRET, + settings.TWITTER_TOKEN, + settings.TWITTER_TOKEN_SECRET, + ) + send_tweet_with_media(api, text, image_path) + except Exception as e: + capture_exception(e) + get_https_proxy.invalidate() + logger.info(f"Exception error. Try refresh proxy. {e}") + return pub_to_twitter(text, image_path, try_count + 1) def pub_to_vk_users(text, api): @@ -208,13 +233,12 @@ def pub_to_slack(text, digest_url, digest_image_url, ifttt_key): def pub_to_email(title: str, news): - description = """ Оставляйте свои комментарии к выпуcкам, пишите нам в Slack (инвайт), - добавляйте свои новости через специальную форму. + добавляйте свои новости через специальную форму. Вы можете следить за нами с помощью - RSS, + RSS, Twitter или Telegram @py_digest{{ object.description|default:''|safe }}
diff --git a/digest/templates/digest/blocks/_issue_anounce.html b/digest/templates/digest/blocks/_issue_anounce.html index e63ed2f0..2b2d512c 100644 --- a/digest/templates/digest/blocks/_issue_anounce.html +++ b/digest/templates/digest/blocks/_issue_anounce.html @@ -5,16 +5,18 @@({{ object.date_from|date:"d.m.Y" }} - {{ object.date_to|date:"d.m.Y" }})
Тенденция недели
- {% endif %}
-
|
-