This commit is contained in:
esenke
2025-12-08 22:16:31 +08:00
commit 01adcfdf60
305 changed files with 50879 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
import click
from rsi_download.download_async import download_core
import asyncio
@click.command()
@click.argument("x", type=click.STRING)
@click.argument("y", type=click.STRING)
@click.argument("z", type=click.STRING)
@click.argument("date_min", type=click.STRING)
@click.argument("date_max", type=click.STRING)
@click.option(
"--username",
"-u",
type=click.STRING,
help="Username for Copernicus Data Space Ecosystem",
)
@click.option(
"--password", "-p", prompt=True, hide_input=True, confirmation_prompt=False
)
@click.option(
"--api_key", "-k", prompt=True, hide_input=True, confirmation_prompt=False
)
@click.option(
"--max",
"-m",
"max_",
default=100,
type=click.INT,
show_default=True,
help="maximum number of results returned",
)
@click.option(
"--cloud-coverage",
"-c",
"cloud_coverage",
default=10.00,
type=click.FLOAT,
show_default=True,
help="Get only results with a cloud coverage percentage less then the argument given.",
)
@click.option(
"--platform-name",
"-n",
"platform_name",
default="S2",
type=click.Choice(["S2", "S1", "WV3"]),
show_default=True,
help="Get only results with a platform name.",
)
@click.option(
"--debug",
default=False,
is_flag=True,
type=click.BOOL,
show_default=True,
help="Debug the http requests and extra debug logging",
)
@click.option(
"--tci",
default=False,
is_flag=True,
type=click.BOOL,
show_default=True,
help="Download only True Color Image (TCI)",
)
def main(x, y, z, date_min, date_max, username, password, api_key, max_, cloud_coverage, debug, tci, platform_name):
return asyncio.run(download_core(x, y, z, date_min, date_max, username, password, api_key, max_, cloud_coverage, debug, tci, platform_name))
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n程序已终止")

View File

@@ -0,0 +1,36 @@
import httpx
import msgspec
class CDSETokens(msgspec.Struct):
"""Copernicus Data Space Ecosystem Tokens"""
access_token: str
refresh_token: str
expires_in: int
refresh_expires_in: int
token_type: str
not_before_policy: int = msgspec.field(name="not-before-policy")
session_state: str
scope: str
def get_access_token(username: str, password: str) -> CDSETokens:
data = {
"client_id": "cdse-public",
"username": username,
"password": password,
"grant_type": "password",
}
try:
with httpx.Client() as client:
r = client.post(
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
data=data,
)
r.raise_for_status()
except Exception as e:
raise Exception(
f"Access token creation failed: {e}. Reponse from the server was: {r.json()}"
)
return msgspec.json.decode(r.content, type=CDSETokens)

View File

@@ -0,0 +1,133 @@
from typing import Tuple, List
from rich.table import Table
from rich.console import Console
import re
import msgspec
from datetime import datetime
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
from rsi_download.exceptions import InvalidWktPointArgument, InvalidDateRangeArgument
from rsi_download.download.search import SearchContent, SearchResult
class Preview(msgspec.Struct):
id: str
productid: str
url: str
origin_date: str
name: str
progress = Progress(
TextColumn("[bold blue]{task.fields[filename]}", justify="right"),
BarColumn(bar_width=None),
"[progress.percentage]{task.percentage:>3.1f}%",
"",
DownloadColumn(),
"",
TransferSpeedColumn(),
"",
TimeRemainingColumn(),
)
# "2022-05-03T00:00:00.000Z"
ESA_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
def convert_to_timestamp(datestring="", dateformat="%d-%m-%Y %H:%M:%S") -> str:
if len(datestring) > 10:
source = datetime.strptime(datestring, dateformat)
else:
source = datetime.strptime(datestring, "%d-%m-%Y")
return source.strftime(ESA_DATE_FORMAT)
def daterange_to_timestamp(daterange: str) -> Tuple[str, str]:
if "," not in daterange:
raise InvalidDateRangeArgument(
f'Give a valid daterange string. for example: "11-08-2023 00:00:00,11-09-2023 00:00:00" \n Daterange received: {daterange}'
)
gt, lt = daterange.split(",")
try:
time_gt = convert_to_timestamp(datestring=gt)
except ValueError:
raise InvalidDateRangeArgument(
f"Invalid dateformat encountered for time_gt: {gt}. Dateformat expected: %d-%m-%Y or %d-%m-%Y %H:%M:%S"
)
try:
time_lt = convert_to_timestamp(datestring=lt)
except ValueError:
raise InvalidDateRangeArgument(
f"Invalid dateformat encountered for time_lt: {lt}. Dateformat expected: %d-%m-%Y or %d-%m-%Y %H:%M:%S"
)
return time_gt, time_lt
def wkt_to_point(wktstring: str) -> Tuple[float, ...]:
nums = re.findall(r"[-+]?\d*\.\d+|\d+", wktstring)
if len(nums) != 2:
raise InvalidWktPointArgument(
f"Give a valid WKT string. for example: POINT(-9.1372 38.7000). WKT received: {wktstring}"
)
return tuple(float(n) for n in nums)
def show_preview_urls(search_json: SearchContent, platform_name: str) -> List[Preview]:
"""
Show a list of preview urls for downloading in the terminal
:param search_json: SearchContent object
"""
# print(search_json.value)
preview_urls = [
Preview(
id=str(i),
productid=v.id,
url=v.assets[0].download_link,
origin_date=v.content_date.start,
name=v.name,
)
for i, v in enumerate(search_json.value)
]
table = Table(title=f"RSI Preview Url's")
table.add_column("ID", justify="left", style="magenta")
table.add_column("Acquisition Time", justify="left", style="blue")
table.add_column("Name", justify="left", style="magenta")
for entry in preview_urls:
table.add_row(
entry.id,
f'[link={entry.url.replace("(", "%28").replace(")", "%29")}]{entry.origin_date}[/link]',
entry.name,
)
console = Console()
console.print(table)
return preview_urls
def get_selected_products(
search_json: SearchContent, preview_urls: List[Preview], product_ids: str
) -> List[SearchResult]:
"""
Return the selected items from the search_json by the preview url id.
:param search_json: SearchContent
:param preview_urls: List[Preview]
:param product_ids: string of preview ids
:return: List[SearchResult]
"""
download_product_ids = [
item.productid
for item in preview_urls
if item.id in [str(n) for n in product_ids]
]
return [x for x in search_json.value if x.id in download_product_ids]

View File

@@ -0,0 +1,97 @@
import asyncio
from typing import List
import signal
import httpx
from rich.progress import TaskID, Event
from rsi_download.cli import progress
from rsi_download.download.search import SearchResult
from rsi_download.cli import Preview
import os
done_event = Event()
def handle_sigint(signum, frame):
done_event.set()
signal.signal(signal.SIGINT, handle_sigint)
async def download_tci_products_data(
task_id: TaskID, product: SearchResult, access_token: str, mm_band: str = "R10m"
):
headers = {"Authorization": f"Bearer {access_token}"}
progress.start_task(task_id)
async with httpx.AsyncClient() as client:
client.headers.update(headers)
# create the tci image url
granule_url = f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({product.id})/Nodes({product.name})/Nodes(GRANULE)/Nodes"
granule_resp = await client.get(
f"{granule_url}", follow_redirects=True, headers=headers
)
granule_folder = granule_resp.json()
img_data_url = f"{granule_url}({granule_folder['result'][0]['Name']})/Nodes(IMG_DATA)/Nodes({mm_band})/Nodes"
img_data_resp = await client.get(img_data_url, follow_redirects=True)
img_data = img_data_resp.json()
tci_name = [img["Name"] for img in img_data["result"] if "TCI" in img["Name"]][
0
]
tci_url = f"{img_data_url}({tci_name})/$value"
async with client.stream(
method="GET",
url=tci_url,
headers=headers,
) as response:
progress.update(task_id, total=int(response.headers["Content-length"]))
with open(f"{tci_name}", "wb") as file:
progress.start_task(task_id)
async for chunk in response.aiter_bytes():
if chunk:
file.write(chunk)
progress.update(task_id, advance=len(chunk))
if done_event.is_set():
return
progress.console.log(f"Downloaded {tci_name}")
async def download_data(task_id: TaskID, product: SearchResult, preview: Preview, access_token: str):
headers = {"Authorization": f"Bearer {access_token}"}
async with httpx.AsyncClient() as client:
client.headers.update(headers)
async with client.stream(
"GET",
url=f"https://zipper.dataspace.copernicus.eu/odata/v1/Products({product.id})/$value",
headers=headers,
) as response:
progress.update(task_id, total=int(response.headers["Content-length"]))
with open(f"out_raw/{preview.name.replace('.SAFE', '.zip')}", "wb") as file:
progress.start_task(task_id)
async for chunk in response.aiter_bytes():
if chunk:
file.write(chunk)
progress.update(task_id, advance=len(chunk))
if done_event.is_set():
return
progress.console.log(f"Downloaded {preview.name.replace('.SAFE', '.zip')}")
async def download_products_data(
products: List[SearchResult], previews: List[Preview], access_token: str, tci_only: bool = False
):
with progress:
download_tasks = []
for product, preview in zip(products, previews):
task_id = progress.add_task(
f"{preview.name.replace('.SAFE', '.zip')}",
filename=f"{preview.name.replace('.SAFE', '.zip')}",
start=False,
)
if tci_only:
download_tasks.append(
download_tci_products_data(task_id, product, access_token)
)
else:
download_tasks.append(download_data(task_id, product, preview, access_token))
# os.rename(f"product-{product.id}.zip", f"{preview.name.replace('.SAFE', '.zip')}")
await asyncio.gather(*download_tasks)

View File

@@ -0,0 +1,78 @@
from __future__ import annotations
from typing import List
import msgspec
import httpx
from rsi_download.exceptions import SearchException
from rsi_download.geo.geo_types import GeoJsonPolygon
ESA_SEARCH_URL = r"https://catalogue.dataspace.copernicus.eu/odata/v1/Products"
class ContentData(msgspec.Struct, rename="pascal"):
"""Odata search result start and end date"""
start: str
end: str
class Asset(msgspec.Struct, rename="pascal"):
"""Odata search Asset"""
type_: str = msgspec.field(name="Type")
id: str
download_link: str
s3_path: str
class SearchResult(msgspec.Struct, rename="pascal"):
"""Odata search Result"""
id: str
name: str
content_length: int
origin_date: str
s3_path: str
content_date: ContentData
geo_footprint: GeoJsonPolygon
assets: List[Asset]
class SearchContent(msgspec.Struct):
value: List[SearchResult]
next_link: str | None = msgspec.field(default=None, name="@odata.nextLink")
async def search_odata(
long: float,
lat: float,
cloud_coverage: float,
time_lt: str,
time_gt: str,
max_: int,
platform_name: str,
) -> SearchContent:
# filter voor zoeken op cloudCover, Productype en orbitDirection.
# lt = less then
# eq = equal to
# gt = greater then
# sentinel-2
if platform_name == "S2":
search_filter = f"OData.CSC.Intersects(area=geography'SRID=4326;POINT ({long:.4f} {lat:.4f})') and Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value lt {cloud_coverage:.2f}) and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq 'S2MSI2A') and ContentDate/Start gt {time_gt} and ContentDate/Start lt {time_lt}"
elif platform_name == "S1":
search_filter = f"OData.CSC.Intersects(area=geography'SRID=4326;POINT ({long:.4f} {lat:.4f})') and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'productType' and att/OData.CSC.StringAttribute/Value eq 'IW_GRDH_1S') and ContentDate/Start gt {time_gt} and ContentDate/Start lt {time_lt}"
elif platform_name == "WV3":
search_filter = f"OData.CSC.Intersects(area=geography'SRID=4326;POINT ({long:.4f} {lat:.4f})') and Attributes/OData.CSC.StringAttribute/any(att:att/Name eq 'platformName' and att/OData.CSC.StringAttribute/Value eq 'WorldView-3') and ContentDate/Start gt {time_gt} and ContentDate/Start lt {time_lt}"
else:
raise ValueError(f"Invalid platform name: {platform_name}")
async with httpx.AsyncClient() as client:
r = await client.get(
url=f"{ESA_SEARCH_URL}?$filter={search_filter}&$top={max_}&$expand=Assets",
timeout=60,
)
if not r.status_code == 200:
raise SearchException(f"Error getting data: {r.text}")
return msgspec.json.decode(r.content, type=SearchContent)

View File

@@ -0,0 +1,100 @@
from typing import List, Tuple
import msgspec
import asyncio
from rich import print
from rsi_download.auth import get_access_token
from rsi_download.download.product import download_products_data
from rsi_download.cli import (
show_preview_urls,
Preview,
get_selected_products,
)
from rsi_download.download.search import search_odata
import math
async def download_core(
x: str,
y: str,
z: str,
date_min: str,
date_max: str,
username: str,
password: str,
api_key: str = None,
max_: int = 100,
cloud_coverage: float = 20.0,
debug: bool = False,
tci: bool = False,
platform_name: str = "S2",
):
"""
X tile x coordinate
Y tile y coordinate
Z zoom level
DATE_MIN start date in format YYYYMM
DATE_MAX end date in format YYYYMM
"""
lat, long = tile_to_latlon(float(x), float(y), float(z))
time_gt = f"{date_min[:4]}-{date_min[4:6]}-01T00:00:00.000Z"
year = int(date_max[:4])
month = int(date_max[4:])
if month == 12:
next_year = year + 1
next_month = 1
else:
next_year = year
next_month = month + 1
time_lt = f"{next_year}-{next_month:02d}-01T00:00:00.000Z"
print(f"coordinates: lat: {lat:.4f}, long: {long:.4f}")
print(f"maximum results: {max_}")
print(f"cloud coverage percentage less then: {cloud_coverage:.2f}")
print(f"time_gt: {time_gt}, time_lt: {time_lt}")
search_data = await search_odata(long, lat, cloud_coverage, time_lt, time_gt, max_, platform_name)
if debug:
print("DEBUG: Search request data is saved to disk.")
with open("search_data.json", "wb") as f:
f.write(msgspec.json.encode(search_data))
preview_urls: List[Preview] = show_preview_urls(search_data, platform_name)
print("start downloading all data ...")
products_to_download = get_selected_products(
search_json=search_data, preview_urls=preview_urls, product_ids=list(range(len(preview_urls)))
)
tokens = get_access_token(username, password)
try:
for i, (product, preview) in enumerate(zip(products_to_download, preview_urls)):
print(f"[{i+1}/{len(products_to_download)}] downloading {product.id} ...")
await asyncio.shield(download_products_data(
[product], [preview], tokens.access_token, tci_only=tci
))
except asyncio.CancelledError:
print("\nDownload cancelled, exiting...")
return
def tile_to_latlon(x: int, y: int, z: int, get_center: bool = True) -> Tuple[float, float]:
"""
Convert XYZ tile coordinates to latitude/longitude
Args:
x: Tile X coordinate
y: Tile Y coordinate
z: Zoom level
get_center: If True, returns the center point coordinates. If False, returns the top-left corner.
Returns:
Tuple of (latitude, longitude)
"""
n = 2.0 ** z
if get_center:
x += 0.5
y += 0.5
lon_deg = x / n * 360.0 - 180.0
lat_rad = math.atan(math.sinh(math.pi * (1 - 2 * y / n)))
lat_deg = math.degrees(lat_rad)
return lat_deg, lon_deg

View File

@@ -0,0 +1,16 @@
class InvalidWktPointArgument(Exception):
"""Raised when the WKT string is not a valid point"""
pass
class InvalidDateRangeArgument(Exception):
"""Raised when the daterange string is not valid"""
pass
class SearchException(Exception):
"""Raised when search endpoint returned a non 200 statuscode"""
pass

View File

@@ -0,0 +1,13 @@
from typing import List
import msgspec
class Coordinate(msgspec.Struct):
long: float
lat: float
class GeoJsonPolygon(msgspec.Struct):
type: str
coordinates: List[List[List[float]]]

View File

@@ -0,0 +1,12 @@
def sort_by_cloudcover(search_result):
entries = search_result["feed"]["entry"]
sorted_entries = []
for entry in entries:
sorted_entries.append(
[
float(e["content"])
for e in entry["double"]
if e["name"] == "cloudcoverpercentage"
][0]
)
return sorted(sorted_entries, key=float)