[docs]classNepal(CountryTestBase):location:str="Nepal"units:str="samples tested"source_url:dict={"api":"https://covid19.mohp.gov.np/covid/api/ministryrelease","base":"https://covid19.mohp.gov.np/covid/englishSituationReport/",}source_url_ref:str=Nonesource_label:str="Ministry of Health and Population"regex:dict={"date":r"(\d{1,2}\-\d{1,2}\-20\d{2})","metrics":r"PCR \| Antigen (\d+) (\d+) PCR \| Antigen (\d+) (\d+)",}
[docs]defread(self)->pd.DataFrame:"""Reads data from source."""links=request_json(self.source_url["api"])df=self._parse_data(links)returndf
[docs]def_parse_data(self,links:dict)->pd.DataFrame:"""Parses data from link."""# Obtain pdf urlhref=links["data"][0]["english_file"]self.source_url_ref="{}{}".format(self.source_url["base"],href)# Extract text from pdf urltext=self._extract_text_from_url()# Clean datadf=self._parse_metrics(text)returndf
[docs]def_extract_text_from_url(self)->str:"""Extracts text from pdf."""withtempfile.NamedTemporaryFile()astmp:download_file_from_url(self.source_url_ref,tmp.name)withopen(tmp.name,"rb")asf:text=extract_text(f).replace("\n"," ")text=re.sub(r"\s+"," ",text)returntext
[docs]def_parse_metrics(self,text:str)->pd.DataFrame:"""Parses metrics from data."""# Extract datamatch_count=re.search(self.regex["metrics"],text)ifnotmatch_count:raiseValueError("Unable to extract data from text, please update the regex.")tests=clean_count(match_count.group(1))+clean_count(match_count.group(2))positive=clean_count(match_count.group(3))+clean_count(match_count.group(4))# Create dataframedf={"Cumulative total":[tests],"positive":[positive],}returnpd.DataFrame(df)
[docs]def_parse_date(self,link:str)->str:"""Get date from link."""returnextract_clean_date(link,self.regex["date"],"%d-%m-%Y")
[docs]defpipeline(self,df:pd.DataFrame)->pd.DataFrame:"""Pipeline for data."""returndf.pipe(self.pipe_date).pipe(self.pipe_metadata).pipe(self.pipe_merge_current).pipe(self.pipe_pr)
[docs]defexport(self):"""Exports data to CSV."""df=self.read().pipe(self.pipeline)# Export to CSVself.export_datafile(df,extra_cols=["positive"],float_format="%.3f")