init
This commit is contained in:
158
tools/pretraining_data_builder/rsi_process/utils_s2.py
Normal file
158
tools/pretraining_data_builder/rsi_process/utils_s2.py
Normal file
@@ -0,0 +1,158 @@
|
||||
import xml.dom.minidom
|
||||
import os
|
||||
from glob import glob
|
||||
import zipfile
|
||||
from shapely import wkt
|
||||
import geopandas as gpd
|
||||
from osgeo import gdal
|
||||
import imageio.v2 as iio
|
||||
|
||||
def parse_metadata(meta_xml_file):
|
||||
"""Parse Sentinel-2 metadata XML file
|
||||
|
||||
Args:
|
||||
meta_xml_file: Path to metadata XML file
|
||||
|
||||
Returns:
|
||||
dict: Metadata information including sensing time, product URI, etc.
|
||||
"""
|
||||
record = {}
|
||||
try:
|
||||
dom = xml.dom.minidom.parse(meta_xml_file)
|
||||
|
||||
# Get sensing start time
|
||||
sensing_start = dom.getElementsByTagName('DATATAKE_SENSING_START')[0].firstChild.data[0:10]
|
||||
|
||||
# Get product URI and image paths
|
||||
product_uri = dom.getElementsByTagName('PRODUCT_URI')[0].firstChild.data
|
||||
|
||||
image_file = dom.getElementsByTagName('IMAGE_FILE')[0].firstChild.data
|
||||
items = image_file.split('/')
|
||||
granule_path = items[1]
|
||||
img_name = items[4].split('_')[0] + '_' + items[4].split('_')[1]
|
||||
|
||||
# Get footprint
|
||||
footprint = dom.getElementsByTagName('EXT_POS_LIST')[0].firstChild.data
|
||||
geom_wkt = convert_footprint_to_wkt(footprint)
|
||||
|
||||
# Get cloud coverage info
|
||||
cloud_coverage = float(dom.getElementsByTagName('Cloud_Coverage_Assessment')[0].firstChild.data)
|
||||
cloud_shadow = float(dom.getElementsByTagName('CLOUD_SHADOW_PERCENTAGE')[0].firstChild.data)
|
||||
medium_clouds = float(dom.getElementsByTagName('MEDIUM_PROBA_CLOUDS_PERCENTAGE')[0].firstChild.data)
|
||||
high_clouds = float(dom.getElementsByTagName('HIGH_PROBA_CLOUDS_PERCENTAGE')[0].firstChild.data)
|
||||
|
||||
record.update({
|
||||
'product_uri': product_uri,
|
||||
'sensing_start': sensing_start,
|
||||
'granule_path': granule_path,
|
||||
'img_name': img_name,
|
||||
'cloud_cover': cloud_coverage,
|
||||
'cloud_shadow': cloud_shadow,
|
||||
'medium_clouds': medium_clouds,
|
||||
'high_clouds': high_clouds,
|
||||
'geom_wkt': geom_wkt
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f'Failed to parse XML: {e}')
|
||||
|
||||
return record
|
||||
|
||||
def convert_footprint_to_wkt(footprint):
|
||||
"""Convert footprint string to WKT format"""
|
||||
coords = footprint.strip().split(' ')
|
||||
wkt_coords = []
|
||||
for i in range(0, len(coords), 2):
|
||||
wkt_coords.append(f"{coords[i+1]} {coords[i]}")
|
||||
return f"MULTIPOLYGON ((({','.join(wkt_coords)})))"
|
||||
|
||||
def zip2rec(fn_zip):
|
||||
id_img = os.path.splitext(os.path.basename(fn_zip))[0]
|
||||
archive = zipfile.ZipFile(fn_zip, 'r')
|
||||
fn_xml = archive.open(os.path.join(f'{id_img}.SAFE', 'MTD_MSIL2A.xml'))
|
||||
rec = parse_metadata(fn_xml)
|
||||
rec['geometry'] = wkt.loads(rec['geom_wkt'])
|
||||
thumb = archive.open(os.path.join(f'{id_img}.SAFE', f'{id_img}-ql.jpg'))
|
||||
thumb = iio.imread(thumb)
|
||||
rec['thumb'] = thumb
|
||||
return rec
|
||||
|
||||
def build_catalog(path, fn='catalog'):
|
||||
'''
|
||||
fn: filename or None
|
||||
'''
|
||||
list_fnames = glob(os.path.join(path, 'S2*.zip'))
|
||||
|
||||
list_rec = []
|
||||
for fn_zip in list_fnames:
|
||||
rec = zip2rec(fn_zip)
|
||||
list_rec.append(rec)
|
||||
|
||||
gdf = gpd.GeoDataFrame(list_rec, crs='EPSG:4326').drop(columns='geom_wkt')
|
||||
if fn is not None:
|
||||
fn_geojson = os.path.join(path, f"{fn}.geojson")
|
||||
gdf.to_file(fn_geojson, driver='GeoJSON')
|
||||
return fn_geojson
|
||||
else:
|
||||
return gdf
|
||||
|
||||
def make_full_name(rec, band):
|
||||
dict_bands = {
|
||||
'B2': ['B02', '10m'],
|
||||
'B3': ['B03', '10m'],
|
||||
'B4': ['B04', '10m'],
|
||||
'B8': ['B08', '10m'],
|
||||
'B5': ['B05', '20m'],
|
||||
'B6': ['B06', '20m'],
|
||||
'B7': ['B07', '20m'],
|
||||
'B8A': ['B8A', '20m'],
|
||||
'B11': ['B11', '20m'],
|
||||
'B12': ['B12', '20m'],
|
||||
'SCL': ['SCL', '20m'],
|
||||
}
|
||||
fn_template = os.path.join(
|
||||
'{p0}', 'GRANULE',
|
||||
'{p1}', 'IMG_DATA', "R{p2}",
|
||||
'{p3}_{p4}_{p2}.jp2'
|
||||
)
|
||||
return fn_template.format(**{
|
||||
'p0': rec['product_uri'],
|
||||
'p0b': rec['product_uri'].split('.')[0],
|
||||
'p1': rec['granule_path'],
|
||||
'p2': dict_bands[band][1],
|
||||
'p3': rec['img_name'],
|
||||
'p4': dict_bands[band][0],
|
||||
})
|
||||
|
||||
def warp(
|
||||
ds, outputBounds,
|
||||
outputBoundsSRS='EPSG:4326',
|
||||
xRes=10, yRes=10, targetAlignedPixels=True,
|
||||
**kwargs,
|
||||
):
|
||||
options_warp = gdal.WarpOptions(
|
||||
format="MEM",
|
||||
outputBounds=outputBounds,
|
||||
outputBoundsSRS=outputBoundsSRS,
|
||||
xRes=xRes, yRes=yRes, targetAlignedPixels=targetAlignedPixels,
|
||||
**kwargs,
|
||||
)
|
||||
ds_warp = gdal.Warp('', ds, options=options_warp)
|
||||
return ds_warp
|
||||
|
||||
def get_ndarray(
|
||||
ds, outputBounds,
|
||||
outputBoundsSRS='EPSG:4326',
|
||||
xRes=10, yRes=10, targetAlignedPixels=True,
|
||||
**kwargs,
|
||||
):
|
||||
ds_warp = warp(
|
||||
ds, outputBounds,
|
||||
outputBoundsSRS='EPSG:4326',
|
||||
xRes=10, yRes=10, targetAlignedPixels=True,
|
||||
**kwargs
|
||||
)
|
||||
arr = ds_warp.ReadAsArray()
|
||||
ds_warp = None
|
||||
return arr
|
||||
|
||||
Reference in New Issue
Block a user