[docs]classUkraine:location="Ukraine"units="tests performed"source_label="Cabinet of Ministers of Ukraine"notes=""source_url="https://covid19.gov.ua/en"regex={"count":r"total of tests","date":r"Information as of (\w+) (\d{1,2})",}
[docs]defread(self)->pd.Series:"""Read data from source."""soup=get_soup(self.source_url)data=self._parse_data(soup)returnpd.Series(data)
[docs]def_parse_data(self,soup:BeautifulSoup)->tuple:"""Get data from the source page."""# Get relevant elementelem=self._get_relevant_element(soup)# Extract date from soupdate_=self._parse_date(soup)# parse metrics from elementcount=self._parse_metrics(elem)record={"source_url":self.source_url,"date":date_,"count":count,}returnrecord
[docs]def_get_relevant_element(self,soup:BeautifulSoup)->element.Tag:"""Get the relevant element in news feed."""elem=soup.find(text=re.compile(self.regex["count"])).parent.find_next_sibling(class_="field-value")ifnotelem:raiseTypeError("Website Structure Changed, please update the script")returnelem
[docs]def_parse_metrics(self,elem:element.Tag)->int:"""Gets metrics from the element."""count=elem.text.strip().replace(" ","")returnclean_count(count)
[docs]def_parse_date(self,soup:BeautifulSoup)->str:"""Gets date from the source page."""year=date.today().yeartext=soup.find(text=re.compile(self.regex["date"]))month,day=re.search(self.regex["date"],text).group(1,2)returnclean_date(f"{year}{month}{day}","%Y %B %d")
[docs]defexport(self):"""Export data to csv."""data=self.read()increment(sheet_name=self.location,country=self.location,units=self.units,date=data["date"],source_url=data["source_url"],source_label=self.source_label,count=data["count"],)