[docs]classSaintVincentAndTheGrenadines(CountryTestBase):location:str="Saint Vincent and the Grenadines"units:str="tests performed"source_url:dict="http://health.gov.vc/health/index.php/c"source_url_ref:str=Nonesource_label:str="Ministry of Health, Wellness and the Environment"regex:dict={"title":r"COVID-19 Report","pdf":r"Please click for full details","date":r"(\w+ \d{1,2} 20\d{2})","pcr":r"Total PCR Tests done ([\d,]+)","ag":r"today Total Rapid Ag \(.*?\) [\d,]+ ([\d,]+)",}
[docs]defread(self)->pd.DataFrame:"""Read data from source."""soup=get_soup(self.source_url)df=self._parse_data(soup)returndf
[docs]def_parse_data(self,soup:BeautifulSoup)->pd.DataFrame:"""Parse data from soup."""# Obtain the relevant linklink=self._parse_link(soup,self.regex["title"])# Get soup from linksoup=get_soup(link)# Extract pdf link from soupself.source_url_ref=self._parse_link(soup,self.regex["pdf"])# Extract text from pdf urltext=self._extract_text_from_pdf()# Parse metricscount=self._parse_metrics(text)# Parse datedate=self._parse_date(text)# Create dataframedf={"Cumulative total":[count],"Date":[date],}returnpd.DataFrame(df)
[docs]def_parse_link(self,soup:BeautifulSoup,regex:str)->str:"""Parse link from soup."""href=soup.find("a",text=re.compile(regex))["href"]ifnothref:raiseValueError("Unable to find link, please update the regex.")base_url=get_base_url(self.source_url,"http")returnf"{base_url}{href}"
[docs]def_extract_text_from_pdf(self)->str:"""Extract text from pdf."""withtempfile.NamedTemporaryFile()astmp:download_file_from_url(self.source_url_ref,tmp.name)withopen(tmp.name,"rb")asf:text=extract_text(f).replace("\n"," ")returntext
[docs]def_parse_metrics(self,text:str)->pd.DataFrame:"""Parse metrics from data."""pcr=re.search(self.regex["pcr"],text)ag=re.search(self.regex["ag"],text)ifnotpcrandnotag:raiseValueError("Unable to extract data from text, please update the regex.")pcr=clean_count(pcr.group(1))ag=clean_count(ag.group(1))returnpcr+ag
[docs]def_parse_date(self,text:str)->str:"""Get date from text."""returnextract_clean_date(text.lower(),self.regex["date"],"%b %d %Y")
[docs]defpipeline(self,df:pd.DataFrame)->pd.DataFrame:"""Pipeline for data."""returndf.pipe(self.pipe_metadata)
[docs]defexport(self):"""Export data to CSV."""df=self.read().pipe(self.pipeline)# Export to CSVself.export_datafile(df,attach=True)