# -*- coding: utf-8 -*-
"""
This file provides functions that are used to manage the contents of a data slot
Zoltán Cseresnyés, Ruman Gerst
Research Group Applied Systems Biology - Head: Prof. Dr. Marc Thilo Figge
https://www.leibniz-hki.de/en/applied-systems-biology.html
HKI-Center for Systems Biology of Infection
Leibniz Institute for Natural Product Research and Infection Biology - Hans Knöll Institute (HKI)
Adolf-Reichwein-Straße 23, 07745 Jena, Germany
The project code is licensed under BSD 2-Clause.
See the LICENSE file provided with the code for the full license.
"""
from os import makedirs
from os.path import isdir
from pathlib import Path
import json
import shutil
[docs]
class DataSlot:
"""
Models a JIPipe data slot.
"""
def __init__(self, data_type: str, storage_path: Path, node_id="", internal_path: Path = "", name: str = "",
slot_type: str = ""):
"""
Initializes a new data slot
:param data_type: the JIPipe data type ID that is accepted in this slot. The most basic type is 'jipipe:data'
:param slot_type: the type of the slot. Allowed values are 'input' and 'output'. Can be empty.
:param storage_path: a directory where the slot data is stored. If it does not exist, one will be created
:param node_id: Metadata that indicates the node associated to this slot. Can be empty.
:param internal_path: Metadata that indicates the placement of this data within a hierarchy of data. Can be empty.
:param name: Name of the slot. Can be empty.
"""
self.data_type = data_type
self.slot_type = slot_type
self.storage_path = storage_path
self.node_id = node_id
self.internal_path = internal_path
self.name = name
self.rows = 0
self.text_annotations = []
self.data_annotations = []
self.true_data_types = []
if not isdir(storage_path):
makedirs(storage_path)
[docs]
def get_row_storage_path(self, row: int):
"""
Returns the storage path for the provided row
:param row: the row index
:return: the storage path for the row
"""
return Path(self.storage_path) / Path(str(row))
[docs]
def copy_row(self, source_data_slot, source_row: int):
"""
Copies data from the source slot into this slot
:param source_data_slot: the source slot
:param source_row: the source row
"""
target_row = self.add_row(true_data_type=source_data_slot.true_data_types[source_row])
self.text_annotations[target_row] = source_data_slot.text_annotations[source_row].copy()
self.data_annotations[target_row] = source_data_slot.data_annotations[source_row].copy()
target_row_storage_path = self.get_row_storage_path(target_row)
print("Copy " + str(source_data_slot.get_row_storage_path(source_row)) + " to " + str(target_row_storage_path))
shutil.copytree(src=source_data_slot.get_row_storage_path(source_row),
dst=target_row_storage_path,
dirs_exist_ok=True)
[docs]
def add_row(self, n: int = 1, text_annotations: dict = None, data_annotations: dict = None, true_data_type: str = None):
"""
Adds n rows into the slot
:param true_data_type: the true data type ID of this entry. If none, the slot data type is used
:param text_annotations: annotations to set for these rows
:param data_annotations: annotations to set for these rows
:param n: how many rows to add
:return: the last row index
"""
if text_annotations is None:
text_annotations = {}
if data_annotations is None:
data_annotations = {}
if true_data_type is None:
true_data_type = self.data_type
for i in range(n):
self.rows += 1
self.true_data_types.append(true_data_type)
self.text_annotations.append(text_annotations.copy())
self.data_annotations.append(data_annotations.copy())
p = self.get_row_storage_path(self.rows - 1)
if not isdir(p):
makedirs(p)
return self.rows - 1
[docs]
def to_table(self):
"""
Converts the data slot into a Pandas table.The format is equivalent to data-table.csv generated by JIPipe.
:return: Pandas table
"""
csv_data = {
"jipipe:node-id": [self.node_id for row in range(self.rows)],
"jipipe:slot": [self.name for row in range(self.rows)],
"jipipe:data-type": [self.data_type for row in range(self.rows)],
"jipipe:true-data-type": [self.true_data_types[row] for row in range(self.rows)],
"jipipe:index": [row for row in range(self.rows)]
}
text_annotation_columns = set()
data_annotation_columns = set()
for annotations in self.text_annotations:
for name in annotations:
text_annotation_columns.add(name)
for annotations in self.data_annotations:
for name in annotations:
data_annotation_columns.add(name)
for annotation_column in text_annotation_columns:
csv_data[annotation_column] = [self.text_annotations[row].get(annotation_column, "") for row in range(self.rows)]
for annotation_column in data_annotation_columns:
csv_data["$" + annotation_column] = [str(self.data_annotations[row].get(annotation_column, "")) for row in range(self.rows)]
from pandas import DataFrame
return DataFrame(data=csv_data)
[docs]
def to_dict(self):
"""
Converts the data slot into a dict that can be serialized into JSON
:return: a dictionary that describes this slot
"""
return {
"node-id": self.node_id,
"slot": self.name,
"internal-path": self.internal_path,
"data-type": self.data_type,
"rows": [{
"index": row,
"true-data-type": self.true_data_types[row],
"text-annotations": [{
"name": name,
"value": value
} for name, value in self.text_annotations[row].items()
],
"data-annotations": [{
"name": name,
"true-data-type": value["true-data-type"],
"row-storage-folder": value["row-storage-folder"]
} for name, value in self.data_annotations[row].items()
]
} for row in range(self.rows)]
}
[docs]
def save(self, with_csv=False):
"""
Saves all metadata related to this slot in the storage folder.
This will overwrite data-table.json and data-table.csv (if enabled)
:param with_csv: Also write the table as CSV. The format is equivalent to the one generated by JIPipe. Writing with CSV requires pandas to be installed.
:return: None
"""
json_data = self.to_dict()
with open(self.storage_path / Path("data-table.json"), "w", encoding="utf-8") as f:
json.dump(json_data, f, indent=4, ensure_ascii=False)
if with_csv:
df = self.to_table()
df.to_csv(self.storage_path / Path("data-table.csv"))
[docs]
def import_from_folder(storage_path: Path):
"""
Imports a data slot from a storage path.
A valid storage path contains 'data-table.json' and multiple numeric folders.
:param storage_path: the storage path
:return: a DataSlot instance
"""
with open(Path(storage_path) / "data-table.json", "r", encoding="utf-8") as f:
data_table = json.load(f)
data_slot = DataSlot(data_type=data_table["data-type"],
storage_path=storage_path,
slot_type="",
internal_path=storage_path,
node_id=data_table.get("node-id", ""),
name=data_table.get("slot", ""))
rows = data_table.get("rows", [])
rows.sort(key=lambda row: row["index"])
for row in rows:
text_annotations = {}
data_annotations = {}
for annotation in row["text-annotations"]:
text_annotations[annotation["name"]] = annotation["value"]
for annotation in row["data-annotations"]:
text_annotations[annotation["name"]] = annotation
data_slot.add_row(text_annotations=text_annotations, data_annotations=data_annotations, true_data_type=row["true-data-type"])
return data_slot