From f780c6715b2e25819e0d608c4315434a03ebf599 Mon Sep 17 00:00:00 2001
From: tobiazo <tobias.osvik.brekke@gmail.com>
Date: Wed, 1 Oct 2025 11:32:03 +0200
Subject: [PATCH] Initial commit

---
 .gitignore       |   3 ++
 README.MD        |   1 +
 lasConverter.py  | 104 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |   5 +++
 4 files changed, 113 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.MD
 create mode 100644 lasConverter.py
 create mode 100644 requirements.txt

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1533e9a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+#survey content
+/surveys/*
+/output_las/*
diff --git a/README.MD b/README.MD
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/README.MD
@@ -0,0 +1 @@
+
diff --git a/lasConverter.py b/lasConverter.py
new file mode 100644
index 0000000..39b01c1
--- /dev/null
+++ b/lasConverter.py
@@ -0,0 +1,104 @@
+import sys
+from pathlib import Path
+import dask.dataframe as dd
+import numpy as np
+import laspy
+import pandas as pd
+
+def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final.las", chunk_size_bytes="64MB", max_points_per_file=10_000_000):
+    surveys_path = Path(surveys_folders)
+    output_folder = Path(output_folder)
+    output_folder.mkdir(parents=True, exist_ok=True)
+
+    csv_files = list(surveys_path.rglob("*.csv"))
+    if not csv_files:
+        print("No CSV files found.")
+        return
+    print(f"Found {len(csv_files)} CSV files.")
+
+    # Map surveys to IDs
+    top_dirs = sorted({f.relative_to(surveys_path).parts[0] for f in csv_files})
+    dir_to_id = {name: i+1 for i, name in enumerate(top_dirs)}
+    pd.DataFrame(list(dir_to_id.items()), columns=["top_dir","id"]).to_csv(output_folder / "dir_mapping.csv", index=False)
+
+    # Create LAS header template
+    header = laspy.LasHeader(point_format=3, version="1.2")
+    header.scales = (0.001, 0.001, 0.001)
+    header.offsets = (0.0, 0.0, 0.0)
+
+
+    temp_folder = output_folder / "tmp"
+    temp_folder.mkdir(exist_ok=True)
+
+    # Step 1: Write chunked LAS files
+    print("Step 1: Writing LAS chunks...")
+    file_counter = 1
+    points_in_file = 0
+    writer = None
+
+    def open_new_chunk_file():
+        nonlocal file_counter, points_in_file, writer
+        if writer:
+            writer.close()
+        las_path = temp_folder / f"{output_name}_chunk_{file_counter}.las"
+        writer = laspy.open(str(las_path), mode="w", header=header)
+        print(f"Writing chunk: {las_path}")
+        points_in_file = 0
+        file_counter += 1
+
+    open_new_chunk_file()
+
+    for f in csv_files:
+        top_dir_id = dir_to_id[f.relative_to(surveys_path).parts[0]]
+        # Read CSV in Dask partitions (out-of-core)
+        ddf = dd.read_csv(str(f), header=None, usecols=[0,1,2], blocksize=chunk_size_bytes)
+
+        for partition in ddf.to_delayed():
+            df = partition.compute()
+            x, y, z = df.iloc[:,0].to_numpy(), df.iloc[:,1].to_numpy(), df.iloc[:,2].to_numpy()
+            ids = np.full(len(df), top_dir_id, dtype=np.uint16)
+
+            points = laspy.ScaleAwarePointRecord.zeros(len(df), header=header)
+            points.X = x
+            points.Y = y
+            points.Z = z
+            points.point_source_id = ids
+            points.return_number[:] = 1
+            points.number_of_returns[:] = 1
+
+            # Split points into multiple LAS chunks if needed
+            start_idx = 0
+            while start_idx < len(points):
+                remaining_space = max_points_per_file - points_in_file
+                end_idx = start_idx + remaining_space
+                chunk = points[start_idx:end_idx]
+                writer.write_points(chunk)
+                points_in_file += len(chunk)
+                start_idx = end_idx
+                if points_in_file >= max_points_per_file:
+                    open_new_chunk_file()
+
+    if writer:
+        writer.close()
+
+    # Merging chunked LAS files into single LAS
+    print("Step 2: Merging LAS chunks into final LAS...")
+    las_files = sorted(temp_folder.glob(f"{output_name}_chunk_*.las"))
+    first_las = laspy.read(str(las_files[0]))
+    merged_header = first_las.header
+
+    with laspy.open(str(output_folder / output_name), mode="w", header=merged_header) as merged_writer:
+        for f in las_files:
+            las = laspy.read(str(f))
+            merged_writer.write_points(las.points)
+            print(f"Merged {f.name} ({len(las.points)} points)")
+
+    # Cleanup temporary LAS files
+    for f in las_files:
+        f.unlink()
+    temp_folder.rmdir()
+
+    print(f"Final merged LAS saved at {output_folder / output_name}")
+
+if __name__ == "__main__":
+   CSV_2_LAS(sys.argv[1], sys.argv[2])
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a9016bf
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+pandas
+numpy
+laspy
+pathlib
+dask[dataframe]
\ No newline at end of file