Skip to content

Commit

Permalink
refactor(#3): clean up code
Browse files Browse the repository at this point in the history
  • Loading branch information
AdrianSolberg committed Oct 23, 2025
1 parent a4059ab commit ffe92c6
Showing 1 changed file with 36 additions and 37 deletions.
73 changes: 36 additions & 37 deletions lasConverter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,31 @@
from shapely.ops import transform
from shapely import wkt

def prepare_cells(metadata):
# Relevant level 1 H3 cells
level1_cells = pd.read_csv("metadata/level1_h3_cells.csv")["cell_id"].tolist()
BASE_PATH = Path(".")
METADATA_PATH = BASE_PATH / "metadata"
SURVEYS_PATH = BASE_PATH / "surveys"
CONVERTED_PATH = BASE_PATH / "converted_surveys"
H3_CELLS_PATH = BASE_PATH / "h3_cells"

# Transformer for polygons
transformer = Transformer.from_crs("EPSG:4326", "EPSG:4978", always_xy=True)
def prepare_cells(metadata, level1_cells):
# Transformer for h3 cell polygons
transformer_4326_to_4978 = Transformer.from_crs("EPSG:4326", "EPSG:4978", always_xy=True)

# Generate level 2 cells & polygons
level2_cells = []
cell_polygons = {}
for c1 in level1_cells:
children = h3.cell_to_children(c1, 2)
for c2 in children:
coords = h3.cell_to_boundary(c2)
poly = Polygon([(lng, lat) for lat, lng in coords])
cell_polygons[c2] = transform(transformer.transform, poly)
level2_cells.append(c2)
cell_polygons[c2] = transform(transformer_4326_to_4978.transform, poly)

# Transfrom survey polygons from EPSG:4258 to EPSG:4978
metadata["geom"] = metadata["geom"].apply(wkt.loads)
transform_4258_to_4978 = Transformer.from_crs("EPSG:4258", "EPSG:4978", always_xy=True)
metadata["geom_4978"] = metadata["geom"].apply(lambda g: transform(transform_4258_to_4978.transform, g))

# Assign surveys to cells
# Assign surveys to cells based on overlap
cell_to_surveys = defaultdict(list)
for c2, cell_poly in cell_polygons.items():
for _, survey in metadata.iterrows():
Expand All @@ -52,19 +54,17 @@ def prepare_cells(metadata):

return cell_to_surveys, cell_polygons

def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB"):
surveys_path = Path(surveys_folders)
output_folder = Path(output_folder)
output_folder.mkdir(parents=True, exist_ok=True)
temp_folder = output_folder / "tmp"
def CSV_2_LAS(metadata, chunk_size_bytes="64MB"):
CONVERTED_PATH.mkdir(parents=True, exist_ok=True)
temp_folder = CONVERTED_PATH / "tmp"
temp_folder.mkdir(exist_ok=True)

# Set EPSG:25832 as default when crs is missing
metadata["epsg"] = metadata["epsg"].fillna(25832).astype(int)

for source_id, row in metadata.iterrows():
survey_name = row["survey_name"]
survey_folder = surveys_path / survey_name
survey_folder = SURVEYS_PATH / survey_name

if not survey_folder.exists() or not survey_folder.is_dir():
print(f"⚠️ Survey folder '{survey_folder}' does not exist, skipping.")
Expand All @@ -76,9 +76,11 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")
continue

epsg = row["epsg"]
transformer = Transformer.from_crs(f"EPSG:{epsg}", "EPSG:4978", always_xy=True)
transformer_to_4978 = Transformer.from_crs(f"EPSG:{epsg}", "EPSG:4978", always_xy=True)
file_counter = 1

print(f"Writing LAS chunks for {survey_name}...")

for csv_file in csv_files:
# Read CSV in Dask partitions (out-of-core)
ddf = dd.read_csv(str(csv_file), header=None, blocksize=chunk_size_bytes)
Expand All @@ -88,8 +90,9 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")
x, y, z = df.iloc[:,0].to_numpy(), df.iloc[:,1].to_numpy(), df.iloc[:,2].to_numpy()

# Transform coordinates into EPSG:4978
x, y, z = transformer.transform(x, y, z)
x, y, z = transformer_to_4978.transform(x, y, z)

# Encode 'Accepted' to 0 and 1
accepted = (
df.iloc[:, 3].astype(str)
.str.strip()
Expand All @@ -103,6 +106,7 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")
tvu = df.iloc[:, 4].to_numpy(dtype=np.float32) if df.shape[1] > 4 else np.zeros(len(df), dtype=np.float32)
thu = df.iloc[:, 5].to_numpy(dtype=np.float32) if df.shape[1] > 5 else np.zeros(len(df), dtype=np.float32)

# Set source_id
ids = np.full(len(df), source_id, dtype=np.uint16)

# Create LAS header template
Expand All @@ -118,7 +122,7 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")
las.y = y
las.z = z

# Add extra dimensions
# Set extra dimensions
las["accepted"] = accepted
las["TVU"] = tvu
las["THU"] = thu
Expand All @@ -130,12 +134,12 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")
file_counter += 1

# Merging chunked LAS files into single LAS
print("Step 2: Merging LAS chunks into final LAS...")
print("Merging LAS chunks into final LAS...")
las_files = sorted(temp_folder.glob(f"{survey_name}_chunk_*.las"))
first_las = laspy.read(str(las_files[0]))
merged_header = first_las.header

with laspy.open(str(output_folder / f"{survey_name}.las"), mode="w", header=merged_header) as merged_writer:
with laspy.open(str(CONVERTED_PATH / f"{survey_name}.las"), mode="w", header=merged_header) as merged_writer:
for f in las_files:
las = laspy.read(str(f))
merged_writer.write_points(las.points)
Expand All @@ -149,18 +153,14 @@ def CSV_2_LAS(surveys_folders, output_folder, metadata, chunk_size_bytes="64MB")


def group_by_h3_cell(cell_to_surveys, cell_polygons):
output_folder = Path("h3_cells")
output_folder.mkdir(exist_ok=True)

temp_folder_base = Path("temp_cells") # temporary folder base
temp_folder_base.mkdir(exist_ok=True)
H3_CELLS_PATH.mkdir(exist_ok=True)

for c2, survey_fids in cell_to_surveys.items():
if not survey_fids:
continue

# Create a temporary folder for this H3 cell
temp_folder = temp_folder_base / c2
temp_folder = H3_CELLS_PATH / f"tmp_{c2}"
temp_folder.mkdir(exist_ok=True)

input_files = []
Expand All @@ -187,6 +187,7 @@ def group_by_h3_cell(cell_to_surveys, cell_polygons):

# PDAL filename glob pattern
input_pattern = str(temp_folder / "*.las")

cell_poly = cell_polygons[c2]

# PDAL JSON pipeline
Expand All @@ -203,7 +204,7 @@ def group_by_h3_cell(cell_to_surveys, cell_polygons):
},
{
"type": "writers.las",
"filename": str(output_folder / f"{c2}.las"),
"filename": str(H3_CELLS_PATH / f"{c2}.las"),
"extra_dims": "Accepted=uint8,TVU=float32,THU=float32"
}
]
Expand All @@ -214,20 +215,18 @@ def group_by_h3_cell(cell_to_surveys, cell_polygons):
p = pdal.Pipeline(pipeline_json)
try:
p.execute_streaming(chunk_size=500000)
print(f"Written H3 cell {c2}.")
except KeyboardInterrupt:
print("Keyboard interrupt detected, cleaning up and exiting...")
break
except RuntimeError as e:
print(f"Pipeline failed for cell {c2}: {e}")
print(f"✅ Written H3 cell {c2}.")
except Exception as e:
print(f"⚠️ Pipeline failed for cell {c2}: {e}")
finally:
shutil.rmtree(temp_folder)

if __name__ == "__main__":
metadata = pd.read_csv("./metadata/metadata.csv") # includes fid, survey_name, survey_area, geom, epsg
metadata = pd.read_csv(METADATA_PATH / 'metadata.csv') # includes fid, survey_name, survey_area, geom, epsg
level1_cells = pd.read_csv(METADATA_PATH / "level1_h3_cells.csv")["cell_id"].tolist() # Relevant level 1 H3 cells

cell_to_surveys, cell_polygons = prepare_cells(metadata)
cell_to_surveys, cell_polygons = prepare_cells(metadata, level1_cells)

CSV_2_LAS("surveys", "converted_surveys", metadata)
CSV_2_LAS(metadata)

group_by_h3_cell(cell_to_surveys, cell_polygons)
group_by_h3_cell(cell_to_surveys, cell_polygons)

0 comments on commit ffe92c6

Please sign in to comment.