[docs]classHaiti(CountryTestBase):location:str="Haiti"units:str="tests performed"source_url:dict="https://www.mspp.gouv.ht/documentation/"source_url_ref:str=Nonesource_label:str="Ministry of Public Health and Population"regex:dict={"title":r"surveillance du nouveau Coronavirus \(COVID-19\)","date":r"(\d{1,2}\-\d{1,2}\-20\d{2})","metrics":r"INDICATEURS ([\d,]+)",}
[docs]defread(self)->pd.DataFrame:"""Reads data from source."""soup=get_soup(self.source_url)df=self._parse_data(soup)returndf
[docs]def_parse_data(self,soup:BeautifulSoup)->pd.DataFrame:"""Parses data from soup."""# Obtain pdf urlself.source_url_ref=soup.find(text=re.compile("surveillance du nouveau Coronavirus \(COVID-19\)")).parent.findNext("a")["href"]# Extract text from pdf urltext=self._extract_text_from_url()# Clean datadf=self._parse_metrics(text)returndf
[docs]def_extract_text_from_url(self)->str:"""Extracts text from pdf."""withtempfile.NamedTemporaryFile()astmp:download_file_from_url(self.source_url_ref,tmp.name)withopen(tmp.name,"rb")asf:text=extract_text(f).replace("\n"," ")text=re.sub(r"\s+"," ",text)returntext
[docs]def_parse_metrics(self,text:str)->pd.DataFrame:"""Parses metrics from data."""# Extract datamatch_count=re.search(self.regex["metrics"],text)ifnotmatch_count:raiseValueError("Unable to extract data from text, please update the regex.")count=clean_count(match_count.group(1))# Create dataframedf={"Cumulative total":[count],}returnpd.DataFrame(df)
[docs]def_parse_date(self,link:str)->str:"""Gets date from link."""returnextract_clean_date(link,self.regex["date"],"%d-%m-%Y")