Skip to content

analysis.py

Analysis (Node)

Analysis object to interact with Celonis Studio API.

Source code in celonis_api/studio/analysis.py
class Analysis(Node):
    """Analysis object to interact with Celonis Studio API."""

    def __init__(self, parent, celonis, id_or_data, **kw):
        super().__init__(parent, celonis, id_or_data)

        self.analysis = StudioAnalysis(celonis, self.id)

    @property
    def datamodel(self) -> 'Datamodel':
        """Get the Datamodel connected to the Analysis."""
        return self.analysis.datamodel

    def get_data_frame(self, pql_query: 'PQL', **kwargs) -> pd.DataFrame:
        """Exports the results of a PQL query as
        [pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
        and converts it to a [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html).
        Uses [Analysis.get_data_frame][celonis_api.process_analytics.analysis.Analysis.get_data_frame].

        Args:
            pql_query: The table query to be executed.
            kwargs: Optional keyword arguments passed to `Analysis.get_data_frame`

        Returns:
            Dataframe containing the results of the query.
        """
        return self.analysis.get_data_frame(pql_query, **kwargs)

    def get_data_file(
        self,
        pql_query: 'PQL',
        file_path: typing.Union[str, pathlib.Path] = None,
        export_type: str = "PARQUET",
        variables: typing.List[typing.Dict] = None,
    ) -> pathlib.Path:
        """Exports the results of a PQL query as
        [pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
        and returns the path to the exported file.
        Uses [Analysis.get_data_file][celonis_api.process_analytics.analysis.Analysis.get_data_file].

        Args:
            pql_query: The table query to be executed.
            file_path: The output path for the export. Defaults to `tmpdir/celonis_pql_export_<current_time>.parquet`.
            export_type: Export filetype. One of [`PARQUET`, `EXCEL`, `CSV`].
            variables: Only needed when PQL query contains saved formulas that contain variables.

        Returns:
            Path to downloaded file containing the results of the query.
        """
        return self.analysis.get_data_file(pql_query, file_path, export_type, variables)

    @property
    def draft(self) -> 'BaseDraftDocument':
        """Get/Set the Base Draft Document of the Analysis.
        Uses [Analysis.draft][celonis_api.process_analytics.analysis.BaseAnalysis.draft].
        """
        return self.analysis.draft

    @draft.setter
    def draft(self, value: 'BaseDraftDocument'):
        self.analysis.draft = value

    @property
    def published(self) -> 'BasePublishedDocument':
        """Get/Set the Base Published Document of the Analysis.
        Uses [Analysis.published][celonis_api.process_analytics.analysis.BaseAnalysis.published].
        """
        return self.analysis.published

    @published.setter
    def published(self, value: 'BasePublishedDocument'):
        self.analysis.published = value

    @property
    def saved_formulas(self) -> 'CelonisCollection[BaseAnalysisSavedFormula]':
        """Get all saved formulas of the Analysis.
        Uses [Analysis.saved_formulas][celonis_api.process_analytics.analysis.BaseAnalysis.saved_formulas].
        """
        return self.analysis.saved_formulas

    def create_saved_formula(
        self, name: str, description: str = "", template: str = "", parameters: typing.List[str] = None, **kwargs
    ) -> 'BaseAnalysisSavedFormula':
        """Creates a new Saved Formula.
        Uses [Analysis.create_saved_formula][celonis_api.process_analytics.analysis.BaseAnalysis.create_saved_formula].
        """
        parameters = parameters or []
        return self.analysis.create_saved_formula(name, description, template, parameters, **kwargs)

    @property
    def images(self) -> 'CelonisCollection[BaseAnalysisImage]':
        """Get all images of the Analysis.
        Uses [Analysis.images][celonis_api.process_analytics.analysis.BaseAnalysis.images].
        """
        return self.analysis.images

    def _backup_draft_content(self, backup_path: str = ".") -> pathlib.Path:
        """Make backup of analysis draft in JSON format.
        Parameters
        ----------
        backup_path : str or pathlib.Path, default "."
            The path in which the backup folder will be created.
        Returns
        -------
        pathlib.Path
            Path to the backup folder.
        """
        path = self._prepare_backup_directory(backup_path)
        files = self._create_backup_file_content()
        self._write_backup_files(files, path)

        if len(self.images) > 0:
            self._logger.warning("Image backup currently not supported!")
        return path

    def _prepare_backup_directory(self, backup_path) -> pathlib.Path:
        """Prepares backup directory and removes any previously existing analyses"""
        path = pathlib.Path(backup_path) / f"Backup of Analysis - {utils.pathify(self.name)}"
        if path.exists():
            shutil.rmtree(path)
        path.mkdir()
        return path

    def _write_backup_files(self, files: typing.Dict, path: pathlib.Path):
        """Writes files to backup folder"""
        for k, v in files.items():
            (path / k).write_text(json.dumps(v, sort_keys=True, indent=2))

    def _create_backup_file_content(self) -> typing.Dict:
        """Creates content for backup files"""
        files = {"saved_formulas.json": self.analysis._saved_formula_data, "name.json": self.name}

        data = self.draft.data
        if data["document"]:
            files["draft_variables.json"] = data["document"].pop("variables", [])
            sheets = data["document"].pop("components", [])
            for n, s in enumerate(sheets):
                files[f"draft_sheet_{n:02}.json"] = s
            files["draft.json"] = data
        return files

    def _rebuild_draft_content_from_backup(self, backup_path: typing.Union[str, pathlib.Path]):
        """Overwrites the contents of this analysis with content from a backup folder.
        Parameters
        ----------
        backup_path : str or pathlib.Path
            Path to the backup folder.
        """
        path = pathlib.Path(backup_path)
        assert path.is_dir(), "backup_path must be directory"

        self._upload_formulas(path)
        self._upload_component_and_variables(path)

    def _upload_component_and_variables(self, path: pathlib.Path):
        """Uploads analysis components and variables"""
        if (path / "draft.json").is_file():
            doc = self.draft
            data = json.loads((path / "draft.json").read_text())
            data["document"]["variables"] = json.loads((path / "draft_variables.json").read_text())
            data["document"]["components"] = []
            for s in sorted(path.glob("draft_sheet_*.json")):
                comp_data = json.loads(s.read_text())
                comp_data["components"] = [comp for comp in comp_data["components"] if "imageId" not in comp]
                data["document"]["components"].append(comp_data)
            doc.data["document"] = data["document"]
        else:
            self._logger.warning(f"Draft document of analyses not found in Backup folder '{path}'")

    def _upload_formulas(self, path: pathlib.Path):
        """Upload analysis formulas"""
        formulas = json.loads((path / "saved_formulas.json").read_text())
        for saved_formula in self.saved_formulas:
            saved_formula.delete()
        for formula in formulas:
            self.create_saved_formula(**formula)

datamodel: Datamodel property readonly

Get the Datamodel connected to the Analysis.

draft: BaseDraftDocument property writable

Get/Set the Base Draft Document of the Analysis. Uses Analysis.draft.

images: CelonisCollection[BaseAnalysisImage] property readonly

Get all images of the Analysis. Uses Analysis.images.

published: BasePublishedDocument property writable

Get/Set the Base Published Document of the Analysis. Uses Analysis.published.

saved_formulas: CelonisCollection[BaseAnalysisSavedFormula] property readonly

Get all saved formulas of the Analysis. Uses Analysis.saved_formulas.

create_saved_formula(self, name, description='', template='', parameters=None, **kwargs)

Creates a new Saved Formula. Uses Analysis.create_saved_formula.

Source code in celonis_api/studio/analysis.py
def create_saved_formula(
    self, name: str, description: str = "", template: str = "", parameters: typing.List[str] = None, **kwargs
) -> 'BaseAnalysisSavedFormula':
    """Creates a new Saved Formula.
    Uses [Analysis.create_saved_formula][celonis_api.process_analytics.analysis.BaseAnalysis.create_saved_formula].
    """
    parameters = parameters or []
    return self.analysis.create_saved_formula(name, description, template, parameters, **kwargs)

get_data_file(self, pql_query, file_path=None, export_type='PARQUET', variables=None)

Exports the results of a PQL query as pyarrow.parquet.ParquetFile and returns the path to the exported file. Uses Analysis.get_data_file.

Parameters:

Name Type Description Default
pql_query PQL

The table query to be executed.

required
file_path Union[str, pathlib.Path]

The output path for the export. Defaults to tmpdir/celonis_pql_export_<current_time>.parquet.

None
export_type str

Export filetype. One of [PARQUET, EXCEL, CSV].

'PARQUET'
variables List[Dict]

Only needed when PQL query contains saved formulas that contain variables.

None

Returns:

Type Description
Path

Path to downloaded file containing the results of the query.

Source code in celonis_api/studio/analysis.py
def get_data_file(
    self,
    pql_query: 'PQL',
    file_path: typing.Union[str, pathlib.Path] = None,
    export_type: str = "PARQUET",
    variables: typing.List[typing.Dict] = None,
) -> pathlib.Path:
    """Exports the results of a PQL query as
    [pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
    and returns the path to the exported file.
    Uses [Analysis.get_data_file][celonis_api.process_analytics.analysis.Analysis.get_data_file].

    Args:
        pql_query: The table query to be executed.
        file_path: The output path for the export. Defaults to `tmpdir/celonis_pql_export_<current_time>.parquet`.
        export_type: Export filetype. One of [`PARQUET`, `EXCEL`, `CSV`].
        variables: Only needed when PQL query contains saved formulas that contain variables.

    Returns:
        Path to downloaded file containing the results of the query.
    """
    return self.analysis.get_data_file(pql_query, file_path, export_type, variables)

get_data_frame(self, pql_query, **kwargs)

Exports the results of a PQL query as pyarrow.parquet.ParquetFile and converts it to a pandas.DataFrame. Uses Analysis.get_data_frame.

Parameters:

Name Type Description Default
pql_query PQL

The table query to be executed.

required
kwargs

Optional keyword arguments passed to Analysis.get_data_frame

{}

Returns:

Type Description
DataFrame

Dataframe containing the results of the query.

Source code in celonis_api/studio/analysis.py
def get_data_frame(self, pql_query: 'PQL', **kwargs) -> pd.DataFrame:
    """Exports the results of a PQL query as
    [pyarrow.parquet.ParquetFile](https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetFile.html)
    and converts it to a [pandas.DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html).
    Uses [Analysis.get_data_frame][celonis_api.process_analytics.analysis.Analysis.get_data_frame].

    Args:
        pql_query: The table query to be executed.
        kwargs: Optional keyword arguments passed to `Analysis.get_data_frame`

    Returns:
        Dataframe containing the results of the query.
    """
    return self.analysis.get_data_frame(pql_query, **kwargs)