For this tutorial you will generate some sample (fake) measurement data so you can post it to your project.

Imports

import getpass
import itertools
from contextlib import suppress
from pathlib import Path

import gfhub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from gfhub import nodes
from PIL import Image
from tqdm.notebook import tqdm

np.random.seed(42)  # always generate the same data.
user = getpass.getuser()
print(user)
runner

Client

client = gfhub.Client()

Device Table

device_table_id = client.query_files(name="cutback_device_table.csv").newest().id
device_table = pd.read_csv(client.download_file(device_table_id))
device_table
cell x y components
0 loss_2db 33.835 116.310 816
1 loss_1db 529.485 116.310 400
2 loss_0db 516.910 714.861 16

Grating Coupler Response

We can simulate a coupler response as follows:

def gaussian_grating_coupler_response(
    peak_power, center_wavelength, bandwidth_1dB, wavelength
):
    """Calculate the response of a Gaussian grating coupler.

    Args:
        peak_power: The peak power of the response.
        center_wavelength: The center wavelength of the grating coupler.
        bandwidth_1dB: The 1 dB bandwidth of the coupler.
        wavelength: The wavelength at which the response is evaluated.

    Returns:
     The power of the grating coupler response at the given wavelength.

    """
    # Convert 1 dB bandwidth to standard deviation (sigma)
    sigma = bandwidth_1dB / (2 * np.sqrt(2 * np.log(10)))

    # Gaussian response calculation
    return peak_power * np.exp(-0.5 * ((wavelength - center_wavelength) / sigma) ** 2)

Let's have a look at one such responses:

peak_power = 1.0
center_wavelength = 1.550  # um
bandwidth_1dB = 0.100  # um

df = pd.DataFrame(
    {
        "wl [um]": (
            wls := np.linspace(center_wavelength - 0.05, center_wavelength + 0.05, 150)
        ),
        "power [dB]": gaussian_grating_coupler_response(
            peak_power, center_wavelength, bandwidth_1dB, wls
        ),
    }
)

plt.plot(df["wl [um]"], df["power [dB]"])
plt.title("Gaussian Grating Coupler Response")
plt.grid(True)
plt.xlabel("wl [um]")
plt.ylabel("power [dB]")
plt.show()

png

Function

We can create a plot_parquet function to plot two columns in a dataframe:

def plot_parquet(path: Path, /, *, x: str, y: str) -> Path:
    df = pd.read_parquet(path)
    plt.plot(df[x], df[y])
    plt.xlabel(x)
    plt.ylabel(y)
    outpath = path.with_suffix(".png")
    plt.savefig(outpath, bbox_inches="tight")
    return outpath
func_def = gfhub.Function(
    plot_parquet,
    dependencies={
        "pandas[pyarrow]": "import pandas as pd",
        "matplotlib": "import matplotlib.pyplot as plt",
    },
)
temp_path = Path("temp.parquet").resolve()
df.to_parquet(temp_path)
result = func_def.eval(temp_path, x="wl [um]", y="power [dB]")
print(result)
Image.open(result["output"])
{'success': True, 'output': PosixPath('/home/runner/work/DataLab/DataLab/crates/sdk/examples/cutback/temp.png')}

png

client.add_function(func_def)
{'id': '019bb95f-eeda-7951-a7b6-d391594a71f2',
 'name': 'plot_parquet',
 'parameters': {'x': {'type': 'str'}, 'y': {'type': 'str'}},
 'inputs': {'path': {'type': 'Path'}},
 'outputs': {'0': {'type': 'Path'}},
 'created_at': '2026-01-13T22:00:30.682116Z',
 'updated_at': '2026-01-14T10:24:18.307324Z'}

Pipeline

This function will still not run on its own. For this we need to create a pipeline. Whe have a convenient pipeline constructor method that will wrap a single function to run when a file is uploaded with a certain set of tags. This saves us some boilerplate having to writ this ourselves:

pipeline = gfhub.Pipeline.on_file_upload(
    "plot_parquet",
    tags=[".parquet", "project:cutback", user],
    kwargs={"x": "wl [um]", "y": "power [dB]"},
)
confirmation = client.add_pipeline("plot_parquet", pipeline)
print(client.pipeline_url(confirmation["id"]))
https://dpd.hub.gdsfactory.com/pipelines/019bbc08-ea12-7c41-b847-29d654e0c899

You can view the pipeline at the url above.

if anything does not look right you can adjust the pipeline and go to the new url for it.

If everything went well, the pipeline is now uploaded and active. Any uploaded .parquet file with the project:rings tag will automatically be processed to generate a plot for it.

Clean up (optional)

Let's delete any existing files from this project so you can start fresh.

# Delete existing project files
existing_files = client.query_files(tags=["project:cutback", user])

# keep the files uploaded in the previous notebook
existing_files = [
    f
    for f in existing_files
    if f["original_name"] not in ("cutback_device_table.csv", "cutback.gds")
]

for file in tqdm(existing_files):
    with suppress(RuntimeError):
        client.delete_file(file["id"])
  0%|          | 0/361 [00:00<?, ?it/s]

Upload generated spectra

You can easily generate some spectrum data and add some noise to make it look like a real measurement.

wafer_id = "wafer1"
wafer_definitions = Path("wafer_definitions.json")
wafers = [wafer_id]
dies = [
    {"x": x, "y": y}
    for y in range(-3, 4)
    for x in range(-3, 4)
    if not (abs(y) == 3 and abs(x) == 3)
]
cwd = Path.cwd()
grating_coupler_loss_dB = 3
device_loss_dB = 0.1
noise_peak_to_peak_dB = device_loss_dB / 10
device_loss_noise_dB = device_loss_dB / 10 * 2
for wafer, die, row in tqdm(
    list(itertools.product(wafers, dies, device_table.to_numpy()))
):
    die = f"{(die['x'])},{(die['y'])}"
    cell, dev_x, dev_y, components = row
    device = f"{dev_x},{dev_y}"
    T = 25.0  # temperature
    loss_dB = 2 * grating_coupler_loss_dB + components * (
        device_loss_dB + device_loss_noise_dB * np.random.rand()
    )
    peak_power = 10 ** (-loss_dB / 10)
    output_power = gaussian_grating_coupler_response(
        peak_power, center_wavelength, bandwidth_1dB, wls
    )
    output_power = np.array(output_power)
    output_power *= 10 ** (noise_peak_to_peak_dB * np.random.rand(wls.shape[0]) / 10)
    output_power = 10 * np.log10(output_power)
    df = pd.DataFrame(
        {
            "wl [um]": wls,
            "power [dB]": output_power,
        }
    )
    client.add_file(
        df,
        tags=[
            user,
            "project:cutback",
            f"wafer:{wafer}",
            f"die:{die}",
            f"cell:{cell}",
            f"device:{device}",
            f"T:{T}",
            f"components:{components}",
        ],
        filename=f"cutback_device_{components}.parquet",
    )
  0%|          | 0/135 [00:00<?, ?it/s]