For this tutorial you will generate some sample (fake) measurement data so you can post it to your project.
Imports¶
import getpass
import itertools
from contextlib import suppress
from pathlib import Path
import gfhub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from gfhub import nodes
from PIL import Image
from tqdm.notebook import tqdm
np.random.seed(42) # always generate the same data.
user = getpass.getuser()
print(user)
runner
Client¶
Device Table¶
device_table_id = client.query_files(name="cutback_device_table.csv").newest().id
device_table = pd.read_csv(client.download_file(device_table_id))
device_table
| cell | x | y | components | |
|---|---|---|---|---|
| 0 | loss_2db | 33.835 | 116.310 | 816 |
| 1 | loss_1db | 529.485 | 116.310 | 400 |
| 2 | loss_0db | 516.910 | 714.861 | 16 |
Grating Coupler Response¶
We can simulate a coupler response as follows:
def gaussian_grating_coupler_response(
peak_power, center_wavelength, bandwidth_1dB, wavelength
):
"""Calculate the response of a Gaussian grating coupler.
Args:
peak_power: The peak power of the response.
center_wavelength: The center wavelength of the grating coupler.
bandwidth_1dB: The 1 dB bandwidth of the coupler.
wavelength: The wavelength at which the response is evaluated.
Returns:
The power of the grating coupler response at the given wavelength.
"""
# Convert 1 dB bandwidth to standard deviation (sigma)
sigma = bandwidth_1dB / (2 * np.sqrt(2 * np.log(10)))
# Gaussian response calculation
return peak_power * np.exp(-0.5 * ((wavelength - center_wavelength) / sigma) ** 2)
Let's have a look at one such responses:
peak_power = 1.0
center_wavelength = 1.550 # um
bandwidth_1dB = 0.100 # um
df = pd.DataFrame(
{
"wl [um]": (
wls := np.linspace(center_wavelength - 0.05, center_wavelength + 0.05, 150)
),
"power [dB]": gaussian_grating_coupler_response(
peak_power, center_wavelength, bandwidth_1dB, wls
),
}
)
plt.plot(df["wl [um]"], df["power [dB]"])
plt.title("Gaussian Grating Coupler Response")
plt.grid(True)
plt.xlabel("wl [um]")
plt.ylabel("power [dB]")
plt.show()

Function¶
We can create a plot_parquet function to plot two columns in a dataframe:
def plot_parquet(path: Path, /, *, x: str, y: str) -> Path:
df = pd.read_parquet(path)
plt.plot(df[x], df[y])
plt.xlabel(x)
plt.ylabel(y)
outpath = path.with_suffix(".png")
plt.savefig(outpath, bbox_inches="tight")
return outpath
func_def = gfhub.Function(
plot_parquet,
dependencies={
"pandas[pyarrow]": "import pandas as pd",
"matplotlib": "import matplotlib.pyplot as plt",
},
)
temp_path = Path("temp.parquet").resolve()
df.to_parquet(temp_path)
result = func_def.eval(temp_path, x="wl [um]", y="power [dB]")
print(result)
Image.open(result["output"])
{'success': True, 'output': PosixPath('/home/runner/work/DataLab/DataLab/crates/sdk/examples/cutback/temp.png')}

{'id': '019bb95f-eeda-7951-a7b6-d391594a71f2',
'name': 'plot_parquet',
'parameters': {'x': {'type': 'str'}, 'y': {'type': 'str'}},
'inputs': {'path': {'type': 'Path'}},
'outputs': {'0': {'type': 'Path'}},
'created_at': '2026-01-13T22:00:30.682116Z',
'updated_at': '2026-01-14T10:24:18.307324Z'}
Pipeline¶
This function will still not run on its own. For this we need to create a pipeline. Whe have a convenient pipeline constructor method that will wrap a single function to run when a file is uploaded with a certain set of tags. This saves us some boilerplate having to writ this ourselves:
pipeline = gfhub.Pipeline.on_file_upload(
"plot_parquet",
tags=[".parquet", "project:cutback", user],
kwargs={"x": "wl [um]", "y": "power [dB]"},
)
confirmation = client.add_pipeline("plot_parquet", pipeline)
print(client.pipeline_url(confirmation["id"]))
https://dpd.hub.gdsfactory.com/pipelines/019bbc08-ea12-7c41-b847-29d654e0c899
You can view the pipeline at the url above.
if anything does not look right you can adjust the pipeline and go to the new url for it.
If everything went well, the pipeline is now uploaded and active. Any uploaded .parquet file with the project:rings tag will automatically be processed to generate a plot for it.
Clean up (optional)¶
Let's delete any existing files from this project so you can start fresh.
# Delete existing project files
existing_files = client.query_files(tags=["project:cutback", user])
# keep the files uploaded in the previous notebook
existing_files = [
f
for f in existing_files
if f["original_name"] not in ("cutback_device_table.csv", "cutback.gds")
]
for file in tqdm(existing_files):
with suppress(RuntimeError):
client.delete_file(file["id"])
0%| | 0/361 [00:00<?, ?it/s]
Upload generated spectra¶
You can easily generate some spectrum data and add some noise to make it look like a real measurement.
wafer_id = "wafer1"
wafer_definitions = Path("wafer_definitions.json")
wafers = [wafer_id]
dies = [
{"x": x, "y": y}
for y in range(-3, 4)
for x in range(-3, 4)
if not (abs(y) == 3 and abs(x) == 3)
]
cwd = Path.cwd()
grating_coupler_loss_dB = 3
device_loss_dB = 0.1
noise_peak_to_peak_dB = device_loss_dB / 10
device_loss_noise_dB = device_loss_dB / 10 * 2
for wafer, die, row in tqdm(
list(itertools.product(wafers, dies, device_table.to_numpy()))
):
die = f"{(die['x'])},{(die['y'])}"
cell, dev_x, dev_y, components = row
device = f"{dev_x},{dev_y}"
T = 25.0 # temperature
loss_dB = 2 * grating_coupler_loss_dB + components * (
device_loss_dB + device_loss_noise_dB * np.random.rand()
)
peak_power = 10 ** (-loss_dB / 10)
output_power = gaussian_grating_coupler_response(
peak_power, center_wavelength, bandwidth_1dB, wls
)
output_power = np.array(output_power)
output_power *= 10 ** (noise_peak_to_peak_dB * np.random.rand(wls.shape[0]) / 10)
output_power = 10 * np.log10(output_power)
df = pd.DataFrame(
{
"wl [um]": wls,
"power [dB]": output_power,
}
)
client.add_file(
df,
tags=[
user,
"project:cutback",
f"wafer:{wafer}",
f"die:{die}",
f"cell:{cell}",
f"device:{device}",
f"T:{T}",
f"components:{components}",
],
filename=f"cutback_device_{components}.parquet",
)
0%| | 0/135 [00:00<?, ?it/s]