Column Profile Compilers

class dataprofiler.profilers.column_profile_compilers.BaseCompiler(df_series=None, options=None, pool=None)

Bases: object

abstract property profile
update_profile(df_series, pool=None)

Updates the profiles from the data frames

Parameters
  • df_series (pandas.core.series.Series) – a given column, assume df_series in str

  • pool (multiprocessing.Pool) – pool to utilized for multiprocessing

Returns

Self

Return type

BaseCompiler

class dataprofiler.profilers.column_profile_compilers.ColumnPrimitiveTypeProfileCompiler(df_series=None, options=None, pool=None)

Bases: dataprofiler.profilers.column_profile_compilers.BaseCompiler

property profile
update_profile(df_series, pool=None)

Updates the profiles from the data frames

Parameters
  • df_series (pandas.core.series.Series) – a given column, assume df_series in str

  • pool (multiprocessing.Pool) – pool to utilized for multiprocessing

Returns

Self

Return type

BaseCompiler

class dataprofiler.profilers.column_profile_compilers.ColumnStatsProfileCompiler(df_series=None, options=None, pool=None)

Bases: dataprofiler.profilers.column_profile_compilers.BaseCompiler

property profile
update_profile(df_series, pool=None)

Updates the profiles from the data frames

Parameters
  • df_series (pandas.core.series.Series) – a given column, assume df_series in str

  • pool (multiprocessing.Pool) – pool to utilized for multiprocessing

Returns

Self

Return type

BaseCompiler

class dataprofiler.profilers.column_profile_compilers.ColumnDataLabelerCompiler(df_series=None, options=None, pool=None)

Bases: dataprofiler.profilers.column_profile_compilers.BaseCompiler

property profile
update_profile(df_series, pool=None)

Updates the profiles from the data frames

Parameters
  • df_series (pandas.core.series.Series) – a given column, assume df_series in str

  • pool (multiprocessing.Pool) – pool to utilized for multiprocessing

Returns

Self

Return type

BaseCompiler

class dataprofiler.profilers.column_profile_compilers.UnstructuredCompiler(df_series=None, options=None, pool=None)

Bases: dataprofiler.profilers.column_profile_compilers.BaseCompiler

property profile
update_profile(df_series, pool=None)

Updates the profiles from the data frames

Parameters
  • df_series (pandas.core.series.Series) – a given column, assume df_series in str

  • pool (multiprocessing.Pool) – pool to utilized for multiprocessing

Returns

Self

Return type

BaseCompiler