Updates:
- 2025/07/03 The previous script no longer functions correctly with recent wget releases. This updated version now reliably checks for newer files on the server before downloading, ensuring your local copies stay up to date.
The following is a Python script that downloads netCDF files provided by the reanalysis intercomparison dataset. It is left to the user to modify the script to download the desired data files.
Requirements: Python 3, wget
Those who wish to use the OPeNDAP functionalities can modify this script to suit their needs.
Usage: Save the following lines of code into a file, such as RIDscript.py, and then execute it in a terminal with Python using the command “python RIDscript.py”.
# User input===========================================================================
destination = "myRID" # Destination folder on user's computers
# Selection of data holdings
# It is a example, not an exhaustive list
components = ["zonal", "single-level"]
temporal_resolutions = ["monthly"]
subcomponents = ["core"]
reanalyses = ["ERA5", "JRA-55"]
check_for_updated_files = False
# If True, the script verifies if there is a newer version of files available on server
# If False, existing files are skipped (much faster)
# End of user input====================================================================
# Importing necessary modules
import os
import urllib.request
from xml.dom import minidom
import time
source = "https://www.jamstec.go.jp/RID/thredds/catalog/testAll/"
if os.path.isdir(destination) == False:
print("Destination directory does not exist, creating")
os.makedirs(destination)
if os.name == "nt":
windows = True
else:
windows = False
for component in components:
for temporal_resolution in temporal_resolutions:
for subcomponent in subcomponents:
for reanalysis in reanalyses:
src = (
source
+ component
+ "/common-grid/"
+ temporal_resolution
+ "/"
+ subcomponent
+ "/"
+ reanalysis
+ "/"
)
print(src)
response = urllib.request.urlopen(src)
if response.getcode() == 200:
print("Received response from server")
dom = minidom.parse(response)
files = dom.getElementsByTagName("dataset")
for filenb, file in enumerate(files):
filename = file.attributes["name"].value
if ".nc" in filename:
urlPath = file.attributes["urlPath"].value
downloadpath = (
"https://www.jamstec.go.jp/RID/thredds/fileServer/"
+ urlPath
)
opendappath = (
"https://www.jamstec.go.jp/RID/thredds/dodsC/" + urlPath
)
print("Download URL:", downloadpath)
print("OpenDap path", opendappath)
# Download
file_input = downloadpath
file_output = (
destination + "/" + "/".join(file_input.split("/")[7::])
)
if os.path.isfile(file_output) and (
check_for_updated_files == False
):
print("File present on computer, skipping")
else:
time.sleep(2)
wget_command = (
"wget --tries=20 --no-if-modified-since --timeout=30 -x --cut-dirs=4 -nH -N "
+ file_input
)
if windows == False:
wget_code = (
"cd " + destination + "; " + wget_command
)
else:
wget_code = (
"cd " + destination + " && " + wget_command
)
print(wget_code)
success = False
tries = 1
maxtries = 100
while success == False:
try:
os.system(wget_code)
success = True
except:
wait = 10 * tries
print(
"Could not connect to server after",
tries,
"tries. Waiting for",
wait,
"seconds until next attempt.",
)
time.sleep(wait)
tries += 1
if tries > maxtries:
break