init
This commit is contained in:
70
tools/pretraining_data_builder/rsi_pipeline/data_builder.py
Normal file
70
tools/pretraining_data_builder/rsi_pipeline/data_builder.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import lmdb
|
||||
import os
|
||||
import json
|
||||
from rich import print
|
||||
from rsi_download.download_async import download_core
|
||||
from rsi_process.adapter import process_adapter
|
||||
import asyncclick as click
|
||||
|
||||
@click.command()
|
||||
@click.argument("lmdb_path", type=click.STRING)
|
||||
async def read_lmdb_file(lmdb_path):
|
||||
"""
|
||||
Read the LMDB file and print all key-value pairs
|
||||
|
||||
Args:
|
||||
lmdb_path: LMDB file path
|
||||
"""
|
||||
if not os.path.exists(lmdb_path):
|
||||
print(f"Error: LMDB path '{lmdb_path}' does not exist")
|
||||
return
|
||||
|
||||
try:
|
||||
print(f"Reading Pretraining List from LMDB file from {lmdb_path}...")
|
||||
env = lmdb.open(lmdb_path, readonly=True)
|
||||
total_length = 0
|
||||
with env.begin() as txn:
|
||||
key = b'length'
|
||||
total_length = int(txn.get(key))
|
||||
print(f"Total length of the Pretraining Data: {total_length:,}")
|
||||
print("Example Data:")
|
||||
for i in range(10):
|
||||
print(txn.get(f"{i}".encode()).decode('utf-8'))
|
||||
for i in range(total_length):
|
||||
key = f"{i}".encode()
|
||||
data = json.loads(txn.get(key).decode('utf-8'))
|
||||
print("*"* 116 + "\n" + f"* Current Data [{i+1} / {total_length}]: {data} *" + "\n" + "*"* 116 )
|
||||
print(f"Downloading: {data}")
|
||||
await download_core(
|
||||
x=data['x'],
|
||||
y=data['y'],
|
||||
z=data['z'],
|
||||
date_min=data['date_min'],
|
||||
date_max=data['date_max'],
|
||||
username=os.getenv("USERNAME"),
|
||||
password=os.getenv("PASSWORD"),
|
||||
cloud_coverage=20.0,
|
||||
tci=False
|
||||
)
|
||||
print('-'* 40)
|
||||
print(f"Processing: {data}")
|
||||
process_list = os.listdir('out_raw/')
|
||||
total_len_process = len(process_list)
|
||||
for fn in process_list:
|
||||
print(f"Processing: {fn} [{i+1} / {total_len_process}]...")
|
||||
process_adapter(
|
||||
fn_img=f'out_raw/{fn}',
|
||||
save_dir='out_processed/',
|
||||
verbose=True,
|
||||
use_gcj02=False
|
||||
)
|
||||
print('-'* 40)
|
||||
print("Done!")
|
||||
|
||||
except lmdb.Error as e:
|
||||
print(f"Error reading LMDB file: {str(e)}")
|
||||
finally:
|
||||
env.close()
|
||||
|
||||
if __name__ == "__main__":
|
||||
read_lmdb_file()
|
||||
Reference in New Issue
Block a user