[docs]classPakistan:location="Pakistan"units="tests performed"source_label="Government of Pakistan"notes=""source_url="http://www.covid.gov.pk/"regex={"header":"Pakistan statistics ","count":r"Total Tests","date":r"(\d+ \w+, \d+)",}
[docs]defread(self)->pd.Series:"""Read data from source."""soup=get_soup(self.source_url)data=self._parse_data(soup)returnpd.Series(data)
[docs]def_parse_data(self,soup:BeautifulSoup)->dict:"""Get data from the source page."""# Get relevant elementelem=self._get_relevant_element(soup)ifnotelem:raiseTypeError("Website Structure Changed, please update the script")# Extract metrics from relevant elementcount=self._parse_metrics(elem)# Extract date from soupdate=self._parse_date_from_soup(soup)record={"source_url":self.source_url,"date":date,"count":count,}returnrecord
[docs]def_get_relevant_element(self,soup:BeautifulSoup)->element.Tag:"""Get the relevant element from soup."""elem=soup.find(text=self.regex["count"]).parent.parentreturnelem
[docs]def_parse_metrics(self,elem:element.Tag)->int:"""Get metrics from element."""count=elem.span.textreturnclean_count(count)
[docs]def_parse_date_from_soup(self,soup:BeautifulSoup)->str:"""Get date from soup."""date_text=soup.find(text=self.regex["header"]).parent.findChild(id="last-update")returnextract_clean_date(date_text.text,self.regex["date"],"%d %b, %Y")
[docs]defexport(self):"""Export data to csv."""data=self.read()increment(sheet_name=self.location,country=self.location,units=self.units,date=data["date"],source_url=data["source_url"],source_label=self.source_label,count=data["count"],)