[docs]classFiji(CountryVaxBase):location:str="Fiji"source_url:str="https://www.health.gov.fj/page/"source_url_ref:str=""_num_max_pages:int=3_num_rows_per_page:int=3__element=Noneregex={"title":r"COVID-19 Update","year":r"\d{4}","date":r"tests have been reported for (\w+ \d+)","booster":r"(\d+) individuals have so far received booster doses.",}
[docs]defread(self)->pd.DataFrame:"""Read data from source."""data=[]forcntinrange(1,self._num_max_pages+1):url=f"{self.source_url}{cnt}/"soup=get_soup(url)for_inrange(self._num_rows_per_page):data,proceed=self._parse_data(soup)ifnotproceed:returnpd.DataFrame(data)returnNone
[docs]def_parse_data(self,soup:BeautifulSoup)->tuple:"""Get data from the source page."""# Get relevant element listself._get_list_of_elements(soup)ifnotself.__element:returnNone,True# Get relevant element and year from element listelem,year=self._get_relevant_element_and_year()# Extract url and date from elementself.source_url_ref=self._parse_link_from_element(elem)# Extract text from urltext=self._get_text_from_url(self.source_url_ref)# Extract metrics from textdate=self._parse_date_from_text(year,text)ifnotdate:returnNone,True# Extract metrics from textbooster=self._parse_metrics(text)record={"date":[date],"total_boosters":[booster],}returnrecord,False
[docs]def_get_list_of_elements(self,soup:BeautifulSoup)->None:"""Get the relevant elements list from the source page."""elem_list=soup.find_all("h2")self.__element=[titlefortitleinelem_listifself.regex["title"]intitle.text]
[docs]def_get_relevant_element_and_year(self)->tuple:"""Get the relevant element and year from the element list."""elem=self.__element.pop(0)year=re.search(self.regex["year"],elem.text).group()returnelem,year
[docs]def_parse_date_from_text(self,year:str,text:str)->str:"""Get date from relevant element."""match=re.search(self.regex["date"],text)ifnotmatch:returnNonemonth_day=match.group(1)returnclean_date(f"{month_day}{year}","%B %d %Y")
[docs]def_parse_link_from_element(self,elem:element.Tag)->str:"""Get link from relevant element."""link=elem.find("a")["href"]returnlink
[docs]def_get_text_from_url(self,url:str)->str:"""Extract text from the url."""soup=get_soup(url)text=soup.get_text().replace("\n"," ").replace("\xa0","").lower()text=re.sub(r"(\d),(\d)",r"\1\2",text)returntext
[docs]def_parse_metrics(self,text:str)->int:"""Get metrics from news text."""match=re.search(self.regex["booster"],text)ifnotmatch:raiseTypeError(("Website Structure Changed, please update the script"))count=match.group(1)returnclean_count(count)