[docs]classNicaragua(CountryTestBase):location:str="Nicaragua"units:str="tests performed"source_label:str="Ministry of Health"_base_url:str="http://www.minsa.gob.ni/index.php/repository/func-download"source_url_ref:str="http://www.minsa.gob.ni/index.php/repository/Descargas-MINSA/COVID-19/Boletines-Epidemiol%C3%B3gico/Boletines-2022/"regex:dict={"title":r"Boletín Epidemiológico de la Semana No. ",}
[docs]defread(self)->pd.DataFrame:"""Read data from source"""soup=get_soup(self.source_url_ref)df=self._parse_data(soup)returndf
[docs]def_parse_data(self,soup:BeautifulSoup)->pd.DataFrame:"""Parse data from soup"""# Get the download URLlink=self._get_download_url(soup)# Parse count from pdfcount=self._extract_text_from_url(link)# Get the date from week numdate=self._parse_date(soup)df=pd.DataFrame({"Date":[date],"Cumulative total":[count],})returndf
[docs]def_get_download_url(self,soup:BeautifulSoup)->str:source_url_ref=soup.find("a",text=re.compile(self.regex["title"]))["href"]ifnotsource_url_ref:raiseValueError("Article not found, please update the script")response=requests.get(source_url_ref,allow_redirects=True)text=response.content.decode("utf-8")result=re.search("func-download(.*)'}",text).group(1)link=f"{self._base_url}{result}"returnlink
[docs]def_extract_text_from_url(self,link)->str:"""Extracts text from pdf."""withtempfile.NamedTemporaryFile()astmp:download_file_from_url(link,tmp.name)withopen(tmp.name,"rb")asf:text=extract_text(f).replace("\n"," ")count=clean_count(re.search("• Acumulado: (.*) Recuperados",text).group(1))ifnotcount:raiseValueError("Count not found, please update the script")returncount
[docs]def_parse_date(self,soup:BeautifulSoup)->Iterator:"""parses the date from the week number."""week_num=int(re.search("Boletín Epidemiológico de la Semana No. (.*)",soup.find("a",text=re.compile(self.regex["title"])).text,).group(1))date=Week(2022,week_num,system="iso").enddate()returnclean_date(date)