upload data

For this tutorial you will generate some sample (fake) measurement data so you can post it to your project.

Imports¶

import getpass
import itertools
from contextlib import suppress
from pathlib import Path

import gfhub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from gfhub import nodes
from PIL import Image
from tqdm.notebook import tqdm

np.random.seed(42)  # always generate the same data.
user = getpass.getuser()
print(user)

runner

Client¶

client = gfhub.Client()

Device Table¶

device_table_id = client.query_files(name="cutback_device_table.csv").newest().id
device_table = pd.read_csv(client.download_file(device_table_id))
device_table

	cell	x	y	components
0	loss_2db	33.835	116.310	816
1	loss_1db	529.485	116.310	400
2	loss_0db	516.910	714.861	16

Grating Coupler Response¶

We can simulate a coupler response as follows:

def gaussian_grating_coupler_response(
    peak_power, center_wavelength, bandwidth_1dB, wavelength
):
    """Calculate the response of a Gaussian grating coupler.

    Args:
        peak_power: The peak power of the response.
        center_wavelength: The center wavelength of the grating coupler.
        bandwidth_1dB: The 1 dB bandwidth of the coupler.
        wavelength: The wavelength at which the response is evaluated.

    Returns:
     The power of the grating coupler response at the given wavelength.

    """
    # Convert 1 dB bandwidth to standard deviation (sigma)
    sigma = bandwidth_1dB / (2 * np.sqrt(2 * np.log(10)))

    # Gaussian response calculation
    return peak_power * np.exp(-0.5 * ((wavelength - center_wavelength) / sigma) ** 2)

Let's have a look at one such responses:

peak_power = 1.0
center_wavelength = 1.550  # um
bandwidth_1dB = 0.100  # um

df = pd.DataFrame(
    {
        "wl [um]": (
            wls := np.linspace(center_wavelength - 0.05, center_wavelength + 0.05, 150)
        ),
        "power [dB]": gaussian_grating_coupler_response(
            peak_power, center_wavelength, bandwidth_1dB, wls
        ),
    }
)

plt.plot(df["wl [um]"], df["power [dB]"])
plt.title("Gaussian Grating Coupler Response")
plt.grid(True)
plt.xlabel("wl [um]")
plt.ylabel("power [dB]")
plt.show()

png

Function¶

We can create a plot_parquet function to plot two columns in a dataframe:

def plot_parquet(path: Path, /, *, x: str, y: str) -> Path:
    df = pd.read_parquet(path)
    plt.plot(df[x], df[y])
    plt.xlabel(x)
    plt.ylabel(y)
    outpath = path.with_suffix(".png")
    plt.savefig(outpath, bbox_inches="tight")
    return outpath

func_def = gfhub.Function(
    plot_parquet,
    dependencies={
        "pandas[pyarrow]": "import pandas as pd",
        "matplotlib": "import matplotlib.pyplot as plt",
    },
)

temp_path = Path("temp.parquet").resolve()
df.to_parquet(temp_path)
result = func_def.eval(temp_path, x="wl [um]", y="power [dB]")
print(result)
Image.open(result["output"])

{'success': True, 'output': PosixPath('/home/runner/work/DataLab/DataLab/crates/sdk/examples/cutback/temp.png')}

png

client.add_function(func_def)

{'id': '019bb95f-eeda-7951-a7b6-d391594a71f2',
 'name': 'plot_parquet',
 'parameters': {'x': {'type': 'str'}, 'y': {'type': 'str'}},
 'inputs': {'path': {'type': 'Path'}},
 'outputs': {'0': {'type': 'Path'}},
 'created_at': '2026-01-13T22:00:30.682116Z',
 'updated_at': '2026-01-14T10:24:18.307324Z'}

Pipeline¶

This function will still not run on its own. For this we need to create a pipeline. Whe have a convenient pipeline constructor method that will wrap a single function to run when a file is uploaded with a certain set of tags. This saves us some boilerplate having to writ this ourselves:

pipeline = gfhub.Pipeline.on_file_upload(
    "plot_parquet",
    tags=[".parquet", "project:cutback", user],
    kwargs={"x": "wl [um]", "y": "power [dB]"},
)
confirmation = client.add_pipeline("plot_parquet", pipeline)
print(client.pipeline_url(confirmation["id"]))

https://dpd.hub.gdsfactory.com/pipelines/019bbc08-ea12-7c41-b847-29d654e0c899

You can view the pipeline at the url above.

if anything does not look right you can adjust the pipeline and go to the new url for it.

If everything went well, the pipeline is now uploaded and active. Any uploaded .parquet file with the project:rings tag will automatically be processed to generate a plot for it.

Clean up (optional)¶

Let's delete any existing files from this project so you can start fresh.

# Delete existing project files
existing_files = client.query_files(tags=["project:cutback", user])

# keep the files uploaded in the previous notebook
existing_files = [
    f
    for f in existing_files
    if f["original_name"] not in ("cutback_device_table.csv", "cutback.gds")
]

for file in tqdm(existing_files):
    with suppress(RuntimeError):
        client.delete_file(file["id"])

  0%|          | 0/361 [00:00<?, ?it/s]

Upload generated spectra¶

You can easily generate some spectrum data and add some noise to make it look like a real measurement.

wafer_id = "wafer1"
wafer_definitions = Path("wafer_definitions.json")
wafers = [wafer_id]
dies = [
    {"x": x, "y": y}
    for y in range(-3, 4)
    for x in range(-3, 4)
    if not (abs(y) == 3 and abs(x) == 3)
]

cwd = Path.cwd()
grating_coupler_loss_dB = 3
device_loss_dB = 0.1
noise_peak_to_peak_dB = device_loss_dB / 10
device_loss_noise_dB = device_loss_dB / 10 * 2
for wafer, die, row in tqdm(
    list(itertools.product(wafers, dies, device_table.to_numpy()))
):
    die = f"{(die['x'])},{(die['y'])}"
    cell, dev_x, dev_y, components = row
    device = f"{dev_x},{dev_y}"
    T = 25.0  # temperature
    loss_dB = 2 * grating_coupler_loss_dB + components * (
        device_loss_dB + device_loss_noise_dB * np.random.rand()
    )
    peak_power = 10 ** (-loss_dB / 10)
    output_power = gaussian_grating_coupler_response(
        peak_power, center_wavelength, bandwidth_1dB, wls
    )
    output_power = np.array(output_power)
    output_power *= 10 ** (noise_peak_to_peak_dB * np.random.rand(wls.shape[0]) / 10)
    output_power = 10 * np.log10(output_power)
    df = pd.DataFrame(
        {
            "wl [um]": wls,
            "power [dB]": output_power,
        }
    )
    client.add_file(
        df,
        tags=[
            user,
            "project:cutback",
            f"wafer:{wafer}",
            f"die:{die}",
            f"cell:{cell}",
            f"device:{device}",
            f"T:{T}",
            f"components:{components}",
        ],
        filename=f"cutback_device_{components}.parquet",
    )

  0%|          | 0/135 [00:00<?, ?it/s]