From 21c11833f6872b62fed3843559aebe07e142ffa5 Mon Sep 17 00:00:00 2001 From: tobiaobr Date: Mon, 6 Oct 2025 09:39:48 +0200 Subject: [PATCH] Adjusted script to new demands Changed output coordinate system Added new attributes to the metadata - Accepted boolean flag - TVU - HVU --- lasConverter.py | 36 +++++++++++++++++++++++++++++------- requirements.txt | 3 ++- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/lasConverter.py b/lasConverter.py index 39b01c1..52db308 100644 --- a/lasConverter.py +++ b/lasConverter.py @@ -4,11 +4,15 @@ import numpy as np import laspy import pandas as pd +from pyproj import Transformer -def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final.las", chunk_size_bytes="64MB", max_points_per_file=10_000_000): + +def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final1.las", chunk_size_bytes="64MB", max_points_per_file=10_000_000): surveys_path = Path(surveys_folders) output_folder = Path(output_folder) output_folder.mkdir(parents=True, exist_ok=True) + transformer = Transformer.from_crs("EPSG:25832", "EPSG:4978", always_xy=True) + csv_files = list(surveys_path.rglob("*.csv")) if not csv_files: @@ -22,10 +26,14 @@ def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final.las", ch pd.DataFrame(list(dir_to_id.items()), columns=["top_dir","id"]).to_csv(output_folder / "dir_mapping.csv", index=False) # Create LAS header template - header = laspy.LasHeader(point_format=3, version="1.2") - header.scales = (0.001, 0.001, 0.001) - header.offsets = (0.0, 0.0, 0.0) + header = laspy.LasHeader(point_format=6, version="1.4") + header.scales = (1, 1, 1) + header.offsets = (0, 0, 0) + # Add extra dimensions + header.add_extra_dim(laspy.ExtraBytesParams(name="accepted", type=np.uint8)) + header.add_extra_dim(laspy.ExtraBytesParams(name="TVU", type=np.float32)) + header.add_extra_dim(laspy.ExtraBytesParams(name="THU", type=np.float32)) temp_folder = output_folder / "tmp" temp_folder.mkdir(exist_ok=True) @@ -51,11 +59,24 @@ def open_new_chunk_file(): for f in csv_files: top_dir_id = dir_to_id[f.relative_to(surveys_path).parts[0]] # Read CSV in Dask partitions (out-of-core) - ddf = dd.read_csv(str(f), header=None, usecols=[0,1,2], blocksize=chunk_size_bytes) + ddf = dd.read_csv(str(f), header=None, usecols=[0,1,2,3,4,5], blocksize=chunk_size_bytes) for partition in ddf.to_delayed(): df = partition.compute() x, y, z = df.iloc[:,0].to_numpy(), df.iloc[:,1].to_numpy(), df.iloc[:,2].to_numpy() + + # Transform coordinates into EPSG:4978 + x, y, z = transformer.transform(x, y, z) + accepted = ( + df.iloc[:, 3].astype(str) + .str.strip() + .str.lower() + .eq("accepted") + .astype(np.uint8) + .to_numpy() + ) + tvu = df.iloc[:, 4].to_numpy(dtype=np.float32) + thu = df.iloc[:, 5].to_numpy(dtype=np.float32) ids = np.full(len(df), top_dir_id, dtype=np.uint16) points = laspy.ScaleAwarePointRecord.zeros(len(df), header=header) @@ -63,8 +84,9 @@ def open_new_chunk_file(): points.Y = y points.Z = z points.point_source_id = ids - points.return_number[:] = 1 - points.number_of_returns[:] = 1 + points["accepted"] = accepted + points["TVU"] = tvu + points["THU"] = thu # Split points into multiple LAS chunks if needed start_idx = 0 diff --git a/requirements.txt b/requirements.txt index a9016bf..a4eb13c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ pandas numpy laspy pathlib -dask[dataframe] \ No newline at end of file +dask[dataframe] +pyproj \ No newline at end of file