[docs]classAlbania:location="Albania"units="tests performed"source_label="Ministry of Health and Social Protection"notes=""_base_url="https://shendetesia.gov.al"_url_subdirectory="category/lajme/page"_num_max_pages=3regex={"title":r"COVID19/ Ministria e Shëndetësisë:","date":r"(\d+\/\d+\/\d+)","count":r"(\d+) testime nga të cilat kanë rezultuar pozitivë me COVID19",}
[docs]def_parse_data(self,soup:BeautifulSoup)->tuple:"""Get data from the source page."""# Get relevant elementelem=self._get_relevant_element(soup)ifnotelem:returnNone,True# Extract url and date from elementurl,date=self._get_link_and_date_from_element(elem)# Extract text from urltext=self._get_text_from_url(url)daily_change=self._parse_metrics(text)record={"source_url":url,"date":date,"daily_change":daily_change,}returnrecord,False
[docs]def_get_relevant_element(self,soup:BeautifulSoup)->element.Tag:"""Get the relevant element in news feed."""news_list=soup.find_all("h2")url_idx=[ifori,newsinenumerate(news_list)ifre.search(self.regex["title"],news.text)]ifnoturl_idx:returnNonereturnnews_list[url_idx[0]]
[docs]def_get_text_from_url(self,url:str)->str:"""Extract text from the url."""soup=get_soup(url,verify=False)text=soup.find("div",class_="pageDescription").get_text(strip=True).replace(",","")returntext
[docs]def_get_link_and_date_from_element(self,elem:element.Tag)->tuple:"""Extract link and date from relevant element."""link=self._parse_link_from_element(elem)ifnotlink:returnNonedate=self._parse_date_from_element(elem)returnlink,date
[docs]def_parse_date_from_element(self,elem:element.Tag)->str:"""Get date from relevant element."""date_tag=elem.findNextSibling("div",class_="dateDetalils")date=re.search(self.regex["date"],date_tag.text).group()returnclean_date(date,"%d/%m/%Y")
[docs]def_parse_link_from_element(self,elem:element.Tag)->str:"""Get link from relevant element."""href=elem.findChild("a")["href"]link=f"{href}"returnlink
[docs]def_parse_metrics(self,text:str)->int:"""Get metrics from news text."""count=int(re.search(self.regex["count"],text).group(1))returnclean_count(count)