analysis.py
Analysis (Node)
¶
Analysis object to interact with Celonis Studio API.
Source code in celonis_api/studio/analysis.py
class Analysis(Node):
"""Analysis object to interact with Celonis Studio API."""
def __init__(self, parent, celonis, id_or_data, **kw):
super().__init__(parent, celonis, id_or_data)
self.analysis = StudioAnalysis(celonis, self.id)
@property
def datamodel(self) -> 'Datamodel':
"""Get the Datamodel connected to the Analysis."""
return self.analysis.datamodel
def get_data_frame(self, pql_query: 'PQL', **kwargs) -> pd.DataFrame:
"""Exports the results of a PQL query as
[pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
and converts it to a [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html).
Uses [Analysis.get_data_frame][celonis_api.process_analytics.analysis.Analysis.get_data_frame].
Args:
pql_query: The table query to be executed.
kwargs: Optional keyword arguments passed to `Analysis.get_data_frame`
Returns:
Dataframe containing the results of the query.
"""
return self.analysis.get_data_frame(pql_query, **kwargs)
def get_data_file(
self,
pql_query: 'PQL',
file_path: typing.Union[str, pathlib.Path] = None,
export_type: str = "PARQUET",
variables: typing.List[typing.Dict] = None,
) -> pathlib.Path:
"""Exports the results of a PQL query as
[pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
and returns the path to the exported file.
Uses [Analysis.get_data_file][celonis_api.process_analytics.analysis.Analysis.get_data_file].
Args:
pql_query: The table query to be executed.
file_path: The output path for the export. Defaults to `tmpdir/celonis_pql_export_<current_time>.parquet`.
export_type: Export filetype. One of [`PARQUET`, `EXCEL`, `CSV`].
variables: Only needed when PQL query contains saved formulas that contain variables.
Returns:
Path to downloaded file containing the results of the query.
"""
return self.analysis.get_data_file(pql_query, file_path, export_type, variables)
@property
def draft(self) -> 'BaseDraftDocument':
"""Get/Set the Base Draft Document of the Analysis.
Uses [Analysis.draft][celonis_api.process_analytics.analysis.BaseAnalysis.draft].
"""
return self.analysis.draft
@draft.setter
def draft(self, value: 'BaseDraftDocument'):
self.analysis.draft = value
@property
def published(self) -> 'BasePublishedDocument':
"""Get/Set the Base Published Document of the Analysis.
Uses [Analysis.published][celonis_api.process_analytics.analysis.BaseAnalysis.published].
"""
return self.analysis.published
@published.setter
def published(self, value: 'BasePublishedDocument'):
self.analysis.published = value
@property
def saved_formulas(self) -> 'CelonisCollection[BaseAnalysisSavedFormula]':
"""Get all saved formulas of the Analysis.
Uses [Analysis.saved_formulas][celonis_api.process_analytics.analysis.BaseAnalysis.saved_formulas].
"""
return self.analysis.saved_formulas
def create_saved_formula(
self, name: str, description: str = "", template: str = "", parameters: typing.List[str] = None, **kwargs
) -> 'BaseAnalysisSavedFormula':
"""Creates a new Saved Formula.
Uses [Analysis.create_saved_formula][celonis_api.process_analytics.analysis.BaseAnalysis.create_saved_formula].
"""
parameters = parameters or []
return self.analysis.create_saved_formula(name, description, template, parameters, **kwargs)
@property
def images(self) -> 'CelonisCollection[BaseAnalysisImage]':
"""Get all images of the Analysis.
Uses [Analysis.images][celonis_api.process_analytics.analysis.BaseAnalysis.images].
"""
return self.analysis.images
def _backup_draft_content(self, backup_path: str = ".") -> pathlib.Path:
"""Make backup of analysis draft in JSON format.
Parameters
----------
backup_path : str or pathlib.Path, default "."
The path in which the backup folder will be created.
Returns
-------
pathlib.Path
Path to the backup folder.
"""
path = self._prepare_backup_directory(backup_path)
files = self._create_backup_file_content()
self._write_backup_files(files, path)
if len(self.images) > 0:
self._logger.warning("Image backup currently not supported!")
return path
def _prepare_backup_directory(self, backup_path) -> pathlib.Path:
"""Prepares backup directory and removes any previously existing analyses"""
path = pathlib.Path(backup_path) / f"Backup of Analysis - {utils.pathify(self.name)}"
if path.exists():
shutil.rmtree(path)
path.mkdir()
return path
def _write_backup_files(self, files: typing.Dict, path: pathlib.Path):
"""Writes files to backup folder"""
for k, v in files.items():
(path / k).write_text(json.dumps(v, sort_keys=True, indent=2))
def _create_backup_file_content(self) -> typing.Dict:
"""Creates content for backup files"""
files = {"saved_formulas.json": self.analysis._saved_formula_data, "name.json": self.name}
data = self.draft.data
if data["document"]:
files["draft_variables.json"] = data["document"].pop("variables", [])
sheets = data["document"].pop("components", [])
for n, s in enumerate(sheets):
files[f"draft_sheet_{n:02}.json"] = s
files["draft.json"] = data
return files
def _rebuild_draft_content_from_backup(self, backup_path: typing.Union[str, pathlib.Path]):
"""Overwrites the contents of this analysis with content from a backup folder.
Parameters
----------
backup_path : str or pathlib.Path
Path to the backup folder.
"""
path = pathlib.Path(backup_path)
assert path.is_dir(), "backup_path must be directory"
self._upload_formulas(path)
self._upload_component_and_variables(path)
def _upload_component_and_variables(self, path: pathlib.Path):
"""Uploads analysis components and variables"""
if (path / "draft.json").is_file():
doc = self.draft
data = json.loads((path / "draft.json").read_text())
data["document"]["variables"] = json.loads((path / "draft_variables.json").read_text())
data["document"]["components"] = []
for s in sorted(path.glob("draft_sheet_*.json")):
comp_data = json.loads(s.read_text())
comp_data["components"] = [comp for comp in comp_data["components"] if "imageId" not in comp]
data["document"]["components"].append(comp_data)
doc.data["document"] = data["document"]
else:
self._logger.warning(f"Draft document of analyses not found in Backup folder '{path}'")
def _upload_formulas(self, path: pathlib.Path):
"""Upload analysis formulas"""
formulas = json.loads((path / "saved_formulas.json").read_text())
for saved_formula in self.saved_formulas:
saved_formula.delete()
for formula in formulas:
self.create_saved_formula(**formula)
datamodel: Datamodel
property
readonly
¶
Get the Datamodel connected to the Analysis.
draft: BaseDraftDocument
property
writable
¶
Get/Set the Base Draft Document of the Analysis. Uses Analysis.draft.
images: CelonisCollection[BaseAnalysisImage]
property
readonly
¶
Get all images of the Analysis. Uses Analysis.images.
published: BasePublishedDocument
property
writable
¶
Get/Set the Base Published Document of the Analysis. Uses Analysis.published.
saved_formulas: CelonisCollection[BaseAnalysisSavedFormula]
property
readonly
¶
Get all saved formulas of the Analysis. Uses Analysis.saved_formulas.
create_saved_formula(self, name, description='', template='', parameters=None, **kwargs)
¶
Creates a new Saved Formula. Uses Analysis.create_saved_formula.
Source code in celonis_api/studio/analysis.py
def create_saved_formula(
self, name: str, description: str = "", template: str = "", parameters: typing.List[str] = None, **kwargs
) -> 'BaseAnalysisSavedFormula':
"""Creates a new Saved Formula.
Uses [Analysis.create_saved_formula][celonis_api.process_analytics.analysis.BaseAnalysis.create_saved_formula].
"""
parameters = parameters or []
return self.analysis.create_saved_formula(name, description, template, parameters, **kwargs)
get_data_file(self, pql_query, file_path=None, export_type='PARQUET', variables=None)
¶
Exports the results of a PQL query as pyarrow.parquet.ParquetFile and returns the path to the exported file. Uses Analysis.get_data_file.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pql_query |
PQL |
The table query to be executed. |
required |
file_path |
Union[str, pathlib.Path] |
The output path for the export. Defaults to |
None |
export_type |
str |
Export filetype. One of [ |
'PARQUET' |
variables |
List[Dict] |
Only needed when PQL query contains saved formulas that contain variables. |
None |
Returns:
Type | Description |
---|---|
Path |
Path to downloaded file containing the results of the query. |
Source code in celonis_api/studio/analysis.py
def get_data_file(
self,
pql_query: 'PQL',
file_path: typing.Union[str, pathlib.Path] = None,
export_type: str = "PARQUET",
variables: typing.List[typing.Dict] = None,
) -> pathlib.Path:
"""Exports the results of a PQL query as
[pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
and returns the path to the exported file.
Uses [Analysis.get_data_file][celonis_api.process_analytics.analysis.Analysis.get_data_file].
Args:
pql_query: The table query to be executed.
file_path: The output path for the export. Defaults to `tmpdir/celonis_pql_export_<current_time>.parquet`.
export_type: Export filetype. One of [`PARQUET`, `EXCEL`, `CSV`].
variables: Only needed when PQL query contains saved formulas that contain variables.
Returns:
Path to downloaded file containing the results of the query.
"""
return self.analysis.get_data_file(pql_query, file_path, export_type, variables)
get_data_frame(self, pql_query, **kwargs)
¶
Exports the results of a PQL query as pyarrow.parquet.ParquetFile and converts it to a pandas.DataFrame. Uses Analysis.get_data_frame.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pql_query |
PQL |
The table query to be executed. |
required |
kwargs |
Optional keyword arguments passed to |
{} |
Returns:
Type | Description |
---|---|
DataFrame |
Dataframe containing the results of the query. |
Source code in celonis_api/studio/analysis.py
def get_data_frame(self, pql_query: 'PQL', **kwargs) -> pd.DataFrame:
"""Exports the results of a PQL query as
[pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
and converts it to a [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html).
Uses [Analysis.get_data_frame][celonis_api.process_analytics.analysis.Analysis.get_data_frame].
Args:
pql_query: The table query to be executed.
kwargs: Optional keyword arguments passed to `Analysis.get_data_frame`
Returns:
Dataframe containing the results of the query.
"""
return self.analysis.get_data_frame(pql_query, **kwargs)