-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit f780c67
Showing
4 changed files
with
113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| #survey content | ||
| /surveys/* | ||
| /output_las/* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
|
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,104 @@ | ||
| import sys | ||
| from pathlib import Path | ||
| import dask.dataframe as dd | ||
| import numpy as np | ||
| import laspy | ||
| import pandas as pd | ||
|
|
||
| def CSV_2_LAS(surveys_folders, output_folder, output_name="merged_final.las", chunk_size_bytes="64MB", max_points_per_file=10_000_000): | ||
| surveys_path = Path(surveys_folders) | ||
| output_folder = Path(output_folder) | ||
| output_folder.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| csv_files = list(surveys_path.rglob("*.csv")) | ||
| if not csv_files: | ||
| print("No CSV files found.") | ||
| return | ||
| print(f"Found {len(csv_files)} CSV files.") | ||
|
|
||
| # Map surveys to IDs | ||
| top_dirs = sorted({f.relative_to(surveys_path).parts[0] for f in csv_files}) | ||
| dir_to_id = {name: i+1 for i, name in enumerate(top_dirs)} | ||
| pd.DataFrame(list(dir_to_id.items()), columns=["top_dir","id"]).to_csv(output_folder / "dir_mapping.csv", index=False) | ||
|
|
||
| # Create LAS header template | ||
| header = laspy.LasHeader(point_format=3, version="1.2") | ||
| header.scales = (0.001, 0.001, 0.001) | ||
| header.offsets = (0.0, 0.0, 0.0) | ||
|
|
||
|
|
||
| temp_folder = output_folder / "tmp" | ||
| temp_folder.mkdir(exist_ok=True) | ||
|
|
||
| # Step 1: Write chunked LAS files | ||
| print("Step 1: Writing LAS chunks...") | ||
| file_counter = 1 | ||
| points_in_file = 0 | ||
| writer = None | ||
|
|
||
| def open_new_chunk_file(): | ||
| nonlocal file_counter, points_in_file, writer | ||
| if writer: | ||
| writer.close() | ||
| las_path = temp_folder / f"{output_name}_chunk_{file_counter}.las" | ||
| writer = laspy.open(str(las_path), mode="w", header=header) | ||
| print(f"Writing chunk: {las_path}") | ||
| points_in_file = 0 | ||
| file_counter += 1 | ||
|
|
||
| open_new_chunk_file() | ||
|
|
||
| for f in csv_files: | ||
| top_dir_id = dir_to_id[f.relative_to(surveys_path).parts[0]] | ||
| # Read CSV in Dask partitions (out-of-core) | ||
| ddf = dd.read_csv(str(f), header=None, usecols=[0,1,2], blocksize=chunk_size_bytes) | ||
|
|
||
| for partition in ddf.to_delayed(): | ||
| df = partition.compute() | ||
| x, y, z = df.iloc[:,0].to_numpy(), df.iloc[:,1].to_numpy(), df.iloc[:,2].to_numpy() | ||
| ids = np.full(len(df), top_dir_id, dtype=np.uint16) | ||
|
|
||
| points = laspy.ScaleAwarePointRecord.zeros(len(df), header=header) | ||
| points.X = x | ||
| points.Y = y | ||
| points.Z = z | ||
| points.point_source_id = ids | ||
| points.return_number[:] = 1 | ||
| points.number_of_returns[:] = 1 | ||
|
|
||
| # Split points into multiple LAS chunks if needed | ||
| start_idx = 0 | ||
| while start_idx < len(points): | ||
| remaining_space = max_points_per_file - points_in_file | ||
| end_idx = start_idx + remaining_space | ||
| chunk = points[start_idx:end_idx] | ||
| writer.write_points(chunk) | ||
| points_in_file += len(chunk) | ||
| start_idx = end_idx | ||
| if points_in_file >= max_points_per_file: | ||
| open_new_chunk_file() | ||
|
|
||
| if writer: | ||
| writer.close() | ||
|
|
||
| # Merging chunked LAS files into single LAS | ||
| print("Step 2: Merging LAS chunks into final LAS...") | ||
| las_files = sorted(temp_folder.glob(f"{output_name}_chunk_*.las")) | ||
| first_las = laspy.read(str(las_files[0])) | ||
| merged_header = first_las.header | ||
|
|
||
| with laspy.open(str(output_folder / output_name), mode="w", header=merged_header) as merged_writer: | ||
| for f in las_files: | ||
| las = laspy.read(str(f)) | ||
| merged_writer.write_points(las.points) | ||
| print(f"Merged {f.name} ({len(las.points)} points)") | ||
|
|
||
| # Cleanup temporary LAS files | ||
| for f in las_files: | ||
| f.unlink() | ||
| temp_folder.rmdir() | ||
|
|
||
| print(f"Final merged LAS saved at {output_folder / output_name}") | ||
|
|
||
| if __name__ == "__main__": | ||
| CSV_2_LAS(sys.argv[1], sys.argv[2]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| pandas | ||
| numpy | ||
| laspy | ||
| pathlib | ||
| dask[dataframe] |