[docs]classCapeVerde:location="Cape Verde"units="tests performed"source_label="Government of Cape Verde"source_url="https://covid19.cv/category/boletim-epidemiologico/"regex={"date":r"(\d+) (?:de )?(\w+) de (20\d+)","count":r"(?:total|totais) (?:de|dos|das) (\d+) (?:resultados|amostras)",}
[docs]defread(self)->pd.Series:"""Read data from source."""soup=get_soup(self.source_url)data=self._parse_data(soup)returnpd.Series(data)
[docs]def_parse_data(self,soup:BeautifulSoup)->dict:"""Get data from the source page."""# Get relevant elementelem=self._get_relevant_element(soup)# Extract url from elementurl=self._parse_link_from_element(elem)# Extract text from urltext=self._get_text_from_url(url)# Extract metrics from textdaily_change=self._parse_metrics(text)# Extract date from textdate=self._parse_date(text)record={"source_url":url,"date":date,"daily_change":daily_change,}returnrecord
[docs]def_get_relevant_element(self,soup:BeautifulSoup)->element.Tag:"""Get the relevant element from the source page."""elem=soup.find("h3",class_="elementor-post__title")ifnotelem:raiseTypeError("Website Structure Changed, please update the script")returnelem
[docs]def_parse_date(self,text:str)->str:"""Get date from relevant element."""returnextract_clean_date(text.lower(),self.regex["date"],"%d %B %Y",lang="pt")
[docs]def_parse_link_from_element(self,elem:element.Tag)->str:"""Get link from relevant element."""link=elem.findChild("a")["href"]returnlink
[docs]def_get_text_from_url(self,url:str)->str:"""Extract text from the url."""soup=get_soup(url)text=soup.find(class_="page-content").get_text(strip=True).replace(",","")returntext
[docs]def_parse_metrics(self,text:str)->int:"""Get metrics from the text."""match=re.search(self.regex["count"],text)ifnotmatch:raiseValueError("Website Structure Changed, please update the script")returnclean_count(match.group(1))
[docs]defexport(self):"""Export data to CSV."""data=self.read()increment(sheet_name=self.location,country=self.location,units=self.units,date=data["date"],source_url=data["source_url"],source_label=self.source_label,daily_change=data["daily_change"],)