From f780c6715b2e25819e0d608c4315434a03ebf599 Mon Sep 17 00:00:00 2001 From: tobiazo Date: Wed, 1 Oct 2025 11:32:03 +0200 Subject: [PATCH] Initial commit --- .gitignore | 3 ++ README.MD | 1 + lasConverter.py | 104 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 5 +++ 4 files changed, 113 insertions(+) create mode 100644 .gitignore create mode 100644 README.MD create mode 100644 lasConverter.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1533e9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +#survey content +/surveys/* +/output_las/* diff --git a/README.MD b/README.MD new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/README.MD @@ -0,0 +1 @@ + diff --git a/lasConverter.py b/lasConverter.py new file mode 100644 index 0000000..39b01c1 --- /dev/null +++ b/lasConverter.py @@ -0,0 +1,104 @@ +import sys +from pathlib import Path +import dask.dataframe as dd +import numpy as np +import laspy +import pandas as pd + +def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final.las", chunk_size_bytes="64MB", max_points_per_file=10_000_000): + surveys_path = Path(surveys_folders) + output_folder = Path(output_folder) + output_folder.mkdir(parents=True, exist_ok=True) + + csv_files = list(surveys_path.rglob("*.csv")) + if not csv_files: + print("No CSV files found.") + return + print(f"Found {len(csv_files)} CSV files.") + + # Map surveys to IDs + top_dirs = sorted({f.relative_to(surveys_path).parts[0] for f in csv_files}) + dir_to_id = {name: i+1 for i, name in enumerate(top_dirs)} + pd.DataFrame(list(dir_to_id.items()), columns=["top_dir","id"]).to_csv(output_folder / "dir_mapping.csv", index=False) + + # Create LAS header template + header = laspy.LasHeader(point_format=3, version="1.2") + header.scales = (0.001, 0.001, 0.001) + header.offsets = (0.0, 0.0, 0.0) + + + temp_folder = output_folder / "tmp" + temp_folder.mkdir(exist_ok=True) + + # Step 1: Write chunked LAS files + print("Step 1: Writing LAS chunks...") + file_counter = 1 + points_in_file = 0 + writer = None + + def open_new_chunk_file(): + nonlocal file_counter, points_in_file, writer + if writer: + writer.close() + las_path = temp_folder / f"{output_name}_chunk_{file_counter}.las" + writer = laspy.open(str(las_path), mode="w", header=header) + print(f"Writing chunk: {las_path}") + points_in_file = 0 + file_counter += 1 + + open_new_chunk_file() + + for f in csv_files: + top_dir_id = dir_to_id[f.relative_to(surveys_path).parts[0]] + # Read CSV in Dask partitions (out-of-core) + ddf = dd.read_csv(str(f), header=None, usecols=[0,1,2], blocksize=chunk_size_bytes) + + for partition in ddf.to_delayed(): + df = partition.compute() + x, y, z = df.iloc[:,0].to_numpy(), df.iloc[:,1].to_numpy(), df.iloc[:,2].to_numpy() + ids = np.full(len(df), top_dir_id, dtype=np.uint16) + + points = laspy.ScaleAwarePointRecord.zeros(len(df), header=header) + points.X = x + points.Y = y + points.Z = z + points.point_source_id = ids + points.return_number[:] = 1 + points.number_of_returns[:] = 1 + + # Split points into multiple LAS chunks if needed + start_idx = 0 + while start_idx < len(points): + remaining_space = max_points_per_file - points_in_file + end_idx = start_idx + remaining_space + chunk = points[start_idx:end_idx] + writer.write_points(chunk) + points_in_file += len(chunk) + start_idx = end_idx + if points_in_file >= max_points_per_file: + open_new_chunk_file() + + if writer: + writer.close() + + # Merging chunked LAS files into single LAS + print("Step 2: Merging LAS chunks into final LAS...") + las_files = sorted(temp_folder.glob(f"{output_name}_chunk_*.las")) + first_las = laspy.read(str(las_files[0])) + merged_header = first_las.header + + with laspy.open(str(output_folder / output_name), mode="w", header=merged_header) as merged_writer: + for f in las_files: + las = laspy.read(str(f)) + merged_writer.write_points(las.points) + print(f"Merged {f.name} ({len(las.points)} points)") + + # Cleanup temporary LAS files + for f in las_files: + f.unlink() + temp_folder.rmdir() + + print(f"Final merged LAS saved at {output_folder / output_name}") + +if __name__ == "__main__": + CSV_2_LAS(sys.argv[1], sys.argv[2]) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a9016bf --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pandas +numpy +laspy +pathlib +dask[dataframe] \ No newline at end of file