You can compare any dies or wafers, as another type of aggregated analysis
As before, make sure you have the following environment variables set or added to a .env file:
GDSFACTORY_HUB_API_URL="https://{org}.gdsfactoryhub.com"
GDSFACTORY_HUB_QUERY_URL="https://query.{org}.gdsfactoryhub.com"
GDSFACTORY_HUB_KEY="<your-gdsfactoryplus-api-key>"
import getpass
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import gdsfactoryhub as gfh
project_id = f"resistance-{getpass.getuser()}"
client = gfh.create_client_from_env(project_id=project_id)
api = client.api()
query = client.query()
die_analyses_per_wafer = {}
for wafer_id in wafer_ids:
die_analyses_per_wafer[wafer_id] = (
query.analyses()
.not_.is_("die", "null") # only keep die analyses
.eq("die.wafer.wafer_id", wafer_id) # only keep die analyses for this wafer
.eq("function.function_id", "die_iv_sheet_resistance") # only keep die analyses for this function
.execute()
.data
)
df = pd.DataFrame(
[
{
"die_pkey": analysis["die"]["pk"],
"sheet_resistance": analysis["output"].get("sheet_resistance", np.nan),
"wafer_id": wafer_id,
}
for wafer_id, analyses in die_analyses_per_wafer.items()
for analysis in analyses
]
)
df.head()
| die_pkey | sheet_resistance | wafer_id | |
|---|---|---|---|
| 0 | c2f2c95f-20c6-447d-938e-02179ce4d5d4 | NaN | 6d4c615ff105 |
| 1 | 5af3d39f-cdaa-4c54-b3f1-08e274b089b6 | NaN | 6d4c615ff105 |
| 2 | 1898f9f5-bfef-4528-90f8-04c6abf41aa1 | 8.293516e+01 | 6d4c615ff105 |
| 3 | 91fc175a-2204-41b4-b528-1899ade4872a | -1.055094e+12 | 6d4c615ff105 |
| 4 | 3a12038a-b7ed-4b23-a4d1-ed3a163857b9 | 8.926339e+01 | 6d4c615ff105 |
def remove_outliers(df, column_name="sheet_resistance"):
Q1 = df[column_name].quantile(0.25)
Q3 = df[column_name].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
return df[(df[column_name] >= lower_bound) & (df[column_name] <= upper_bound)]
filtered_df = remove_outliers(df)
| die_pkey | sheet_resistance | wafer_id | |
|---|---|---|---|
| 2 | 1898f9f5-bfef-4528-90f8-04c6abf41aa1 | 82.935164 | 6d4c615ff105 |
| 4 | 3a12038a-b7ed-4b23-a4d1-ed3a163857b9 | 89.263391 | 6d4c615ff105 |
| 5 | 594affdb-f3f1-4604-a097-50255a8893d0 | 88.175527 | 6d4c615ff105 |
| 6 | 5d71ad5c-1c76-4c46-9d6a-cb6a199b8191 | 92.697775 | 6d4c615ff105 |
| 7 | 3debdd16-762d-47eb-80e2-db02534fcb4f | 88.342298 | 6d4c615ff105 |
df.boxplot(column="sheet_resistance", by="wafer_id", grid=False)
plt.title("before removing outliers")
plt.suptitle("")
plt.xlabel("Wafer ID")
plt.ylabel("Sheet Resistance")
plt.grid(True, which="major", axis="y", color="gray", alpha=0.5, linestyle="--")
plt.gca().xaxis.grid(True, color="gray", alpha=0.5, linestyle="--")
plt.tight_layout()
plt.show()

filtered_df.boxplot(column="sheet_resistance", by="wafer_id", grid=False)
plt.title("after removing outliers")
plt.suptitle("")
plt.xlabel("Wafer ID")
plt.ylabel("Sheet Resistance")
plt.grid(True, which="major", axis="y", color="gray", alpha=0.5, linestyle="--")
plt.gca().xaxis.grid(True, color="gray", alpha=0.5, linestyle="--")
plt.tight_layout()
plt.show()
