Making asynchronous requests
The first thing that's important to take note, is that the response's pagination vary according to the request's parameters. So firstly, let's write down the parameters that will compose the call URL and get the pagination to this request
In [1]:
Copied!
url = "https://api.mosqlimate.org/api/datastore/infodengue/?"
parameters = {
"per_page": 100,
"disease": "dengue",
"start": "2022-01-01",
"end": "2023-01-01"
# Optional parameters are included here
}
def compose_url(base_url: str, parameters: dict, page: int = 1) -> str:
"""Helper method to compose the API url with parameters"""
url = base_url + "?" if not base_url.endswith("?") else base_url
params = "&".join([f"{p}={v}" for p,v in parameters.items()]) + f"&page={page}"
return url + params
url = "https://api.mosqlimate.org/api/datastore/infodengue/?"
parameters = {
"per_page": 100,
"disease": "dengue",
"start": "2022-01-01",
"end": "2023-01-01"
# Optional parameters are included here
}
def compose_url(base_url: str, parameters: dict, page: int = 1) -> str:
"""Helper method to compose the API url with parameters"""
url = base_url + "?" if not base_url.endswith("?") else base_url
params = "&".join([f"{p}={v}" for p,v in parameters.items()]) + f"&page={page}"
return url + params
In [2]:
Copied!
import requests
pagination = requests.get(compose_url(url, parameters)).json()['pagination']
pagination
import requests
pagination = requests.get(compose_url(url, parameters)).json()['pagination']
pagination
Out[2]:
{'items': 100, 'total_items': 294733, 'page': 1, 'total_pages': 2948, 'per_page': 100}
To get all the data for the resquest, it would be necessary to loop through all the 2948 pages, which would take a (long) while if called synchronously. It's also important to note that adding filters helps to reduce the time to fetch the data, please refer to the documentation to see every specific request parameters
In [3]:
Copied!
import time
import aiohttp
import asyncio
import time
import aiohttp
import asyncio
In [4]:
Copied!
async def fetch_data(session: aiohttp.ClientSession, url: str):
"""Uses ClientSession to create the async call to the API"""
async with session.get(url) as response:
return await response.json()
async def attempt_delay(session: aiohttp.ClientSession, url: str):
"""The request may fail. This method adds a delay to the failing requests"""
try:
return await fetch_data(session, url)
except Exception as e:
await asyncio.sleep(0.2)
return await attempt_delay(session, url)
async def fetch_data(session: aiohttp.ClientSession, url: str):
"""Uses ClientSession to create the async call to the API"""
async with session.get(url) as response:
return await response.json()
async def attempt_delay(session: aiohttp.ClientSession, url: str):
"""The request may fail. This method adds a delay to the failing requests"""
try:
return await fetch_data(session, url)
except Exception as e:
await asyncio.sleep(0.2)
return await attempt_delay(session, url)
In [5]:
Copied!
async def get(base_url: str, parameters: dict) -> list:
st = time.time()
result = []
tasks = []
async with aiohttp.ClientSession() as session:
url = compose_url(base_url, parameters)
data = await attempt_delay(session, url)
total_pages = data["pagination"]["total_pages"]
result.extend(data["items"])
for page in range(1, total_pages + 1):
url = compose_url(base_url, parameters, page)
tasks.append(attempt_delay(session, url))
responses = await asyncio.gather(*tasks)
for resp in responses:
result.extend(resp["items"])
et = time.time()
print(f"Took {et-st:.6f} seconds")
return result
async def get(base_url: str, parameters: dict) -> list:
st = time.time()
result = []
tasks = []
async with aiohttp.ClientSession() as session:
url = compose_url(base_url, parameters)
data = await attempt_delay(session, url)
total_pages = data["pagination"]["total_pages"]
result.extend(data["items"])
for page in range(1, total_pages + 1):
url = compose_url(base_url, parameters, page)
tasks.append(attempt_delay(session, url))
responses = await asyncio.gather(*tasks)
for resp in responses:
result.extend(resp["items"])
et = time.time()
print(f"Took {et-st:.6f} seconds")
return result
In [6]:
Copied!
data = await get(url, parameters)
data = await get(url, parameters)
Took 11.549963 seconds
In [7]:
Copied!
data[0]
data[0]
Out[7]:
{'data_iniSE': '2023-01-01', 'SE': 202301, 'casos_est': 2.0, 'casos_est_min': 2, 'casos_est_max': 2, 'casos': 2, 'municipio_geocodigo': 3200359, 'p_rt1': 0.9779825, 'p_inc100k': 25.400051, 'Localidade_id': 0, 'nivel': 1, 'id': 320035920230119571, 'versao_modelo': '2023-08-02', 'Rt': 18.521618, 'municipio_nome': 'Alto Rio Novo', 'pop': 7874.0, 'tempmin': 23.4285714285714, 'umidmax': 92.6344071428571, 'receptivo': 1, 'transmissao': 0, 'nivel_inc': 0, 'umidmed': 83.1366927142857, 'umidmin': 68.8928015714286, 'tempmed': 25.6071428571429, 'tempmax': 29.0, 'casprov': None, 'casprov_est': None, 'casprov_est_min': None, 'casprov_est_max': None, 'casconf': None}
The data now can be transformed into a DataFrame to a better management
In [8]:
Copied!
import pandas as pd
pd.DataFrame(data)
import pandas as pd
pd.DataFrame(data)
Out[8]:
data_iniSE | SE | casos_est | casos_est_min | casos_est_max | casos | municipio_geocodigo | p_rt1 | p_inc100k | Localidade_id | ... | nivel_inc | umidmed | umidmin | tempmed | tempmax | casprov | casprov_est | casprov_est_min | casprov_est_max | casconf | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2023-01-01 | 202301 | 2.0 | 2 | 2 | 2 | 3200359 | 0.977982 | 25.400051 | 0 | ... | 0 | 83.136693 | 68.892802 | 25.607143 | 29.000000 | None | None | None | None | None |
1 | 2023-01-01 | 202301 | 0.0 | 0 | 0 | 0 | 2400802 | 0.000000 | 0.000000 | 0 | ... | 0 | NaN | NaN | NaN | NaN | None | None | None | None | None |
2 | 2023-01-01 | 202301 | 0.0 | 0 | 0 | 0 | 3201001 | 0.000000 | 0.000000 | 0 | ... | 0 | 83.136693 | 68.892802 | 25.607143 | 29.000000 | None | None | None | None | None |
3 | 2023-01-01 | 202301 | 0.0 | 0 | 0 | 0 | 2400406 | 0.000000 | 0.000000 | 0 | ... | 0 | 74.465981 | 62.051635 | 28.119047 | 30.714286 | None | None | None | None | None |
4 | 2023-01-01 | 202301 | 1.0 | 1 | 1 | 1 | 2401453 | 0.814528 | 3.478624 | 0 | ... | 0 | 74.465981 | 62.051635 | 28.119047 | 30.714286 | None | None | None | None | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
294728 | 2022-01-02 | 202201 | 1.0 | 1 | 1 | 1 | 5005004 | 0.957368 | 3.811266 | 0 | ... | 0 | 66.000329 | 51.599714 | 30.884943 | 34.142857 | None | None | None | None | None |
294729 | 2022-01-02 | 202201 | 0.0 | 0 | 0 | 0 | 4301073 | 0.500000 | 0.000000 | 0 | ... | 0 | 61.352217 | 34.925433 | 25.048617 | 32.500000 | None | None | None | None | None |
294730 | 2022-01-02 | 202201 | 0.0 | 0 | 0 | 0 | 3517109 | 0.000000 | 0.000000 | 0 | ... | 0 | 85.363760 | 69.530120 | 24.658340 | 28.400000 | None | None | None | None | None |
294731 | 2022-01-02 | 202201 | 0.0 | 0 | 0 | 0 | 4301206 | 0.000000 | 0.000000 | 0 | ... | 0 | 68.599086 | 47.522757 | 25.263214 | 30.571429 | None | None | None | None | None |
294732 | 2022-01-02 | 202201 | 3.0 | 3 | 3 | 3 | 3130101 | 0.617076 | 6.846658 | 0 | ... | 0 | 91.207938 | 76.650929 | 22.237682 | 25.428571 | None | None | None | None | None |
294733 rows × 30 columns