Source code for sequential.seq2pat

# -*- coding: utf-8 -*-
# SPDX-License-Identifier: Apache-2.0
import gc
import numpy as np
from typing import NamedTuple, List, Dict, NoReturn, Optional, Tuple
from multiprocessing import Process, Queue
from copy import deepcopy
from joblib import Parallel, delayed

from sequential.backend import seq_to_pat as stp
from sequential.utils import Num, check_true, get_max_column_size, \
    get_min_value, get_max_value, sort_pattern, item_map, \
    string_to_int, int_to_string, check_sequence_feature_same_length, \
    validate_attribute_values, validate_sequences, validate_max_span, \
    aggregate_patterns, validate_min_frequency, validate_batch_args, update_min_frequency


# IMPORTANT: Constant values should not be changed
# These represent parameters in C++ backend that need to be set by matching exact names
class _Constants:
    # List where values correspond to the value of upper gap constraints on an attribute, and
    # whose id can be found in ugapi at the same index
    ugap = 'ugap'

    # List where values are attribute ids of attributes that have upper gap constraints, and
    # where the value of the constraint can be found in ugap at the same index
    ugapi = 'ugapi'

    # List where values correspond to the value of lower gap constraints on an attribute, and
    # whose id can be found in lgapi at the same index
    lgap = 'lgap'

    # List where values values are attribute ids of attributes that have lower gap constraints, and
    # where the value of the constraint can be found in lgap at the same index
    lgapi = 'lgapi'

    # List where values correspond to the value of lower average constraints on an attribute, and
    # whose id can be found in lavri at the same index
    lavr = 'lavr'

    # List where values values are attribute ids of attributes that have lower average constraints, and
    # where the value of the constraint can be found in lavr at the same index
    lavri = 'lavri'

    # List where values correspond to the value of upper average constraints on an attribute, and
    # whose id can be found in uavri at the same index
    uavr = 'uavr'

    # List where values values are attribute ids of attributes that have  upper average constraints, and
    # where the value of the constraint can be found in uavr at the same index
    uavri = 'uavri'

    # List where values correspond to the value of lower span constraints on an attribute, and
    # whose id can be found in lspni at the same index
    lspn = 'lspn'

    # List where values values are attribute ids of attributes that have  lower span constraints, and
    # where the value of the constraint can be found in lspn at the same index
    lspni = 'lspni'

    # List where values correspond to the value of lower span constraints on an attribute, and
    # whose id can be found in uspni at the same index
    uspn = 'uspn'

    # List where values values are attribute ids of attributes that have upper span constraints, and
    # where the value of the constraint can be found in uspn at the same index
    uspni = 'uspni'

    # List where values correspond to the value of lower median constraints on an attribute, and
    # whose id can be found in lmedi at the same index
    lmed = 'lmed'

    # List where values correspond to the value of upper median constraints on an attribute, and
    # whose id can be found in umedi at the same index
    umed = 'umed'

    # List where values values are attribute ids of attributes that have  upper median constraints, and
    # where the value of the constraint can be found in umed at the same index
    umedi = 'umedi'

    # List where values values are attribute ids of attributes that have  upper median constraints, and
    # where the value of the constraint can be found in lmed at the same index
    lmedi = 'lmedi'

    # List where the indexes correspond to the attribute ids and values correspond to the number of
    # upper span constraints on that attribute
    num_minmax = 'num_minmax'

    # List where the indexes correspond to the attribute ids and the values correspond to the number of
    # average constraints on that attribute
    num_avr = 'num_avr'

    # List where the indexes correspond to the attribute ids and values correspond to the number of
    # median constraints on that attribute
    num_med = 'num_med'

    # List where values are attribute ids of attribute that have gap constraints
    tot_gap = 'tot_gap'

    # List where values are attribute ids of attribute that have span constraints
    tot_spn = 'tot_spn'

    # List where values are attribute ids of attribute that have average constraints
    tot_avr = 'tot_avr'

    # Number of attributes that constraint are enforced on
    num_att = 'num_att'

    # List of sequences that will be mined for sequences
    items = 'items'

    # List of attributes that constraints will be imposed on during mining
    attrs = 'attrs'

    # Length of the largest sequence in items
    M = 'M'

    # Number of sequences in items
    N = 'N'

    # If integer sequences, the largest value, if string sequences the number of events
    L = 'L'

    # List where index correspond to attribute ids and values to the maximum value for that attribute
    max_attrs = 'max_attrs'

    # List where index correspond to the attribute ids and values to the minimum value for that attribute
    min_attrs = 'min_attrs'

    # The data size threshold to dynamically decide if batch processing is needed to ease mining task
    dynamic_batch_threshold = 500000

    # If batch_size is not set while the dataset has more than dynamic_batch_threshold sequences,
    # batch_size is set to be default_batch_size, to apply batch processing
    default_batch_size = 10000

    # Default seed to define a random state
    default_seed = 123456


[docs]class Attribute: def __init__(self, values: List[list]): """Attribute with given values. Attributes ---------- values: List[list] A list of lists corresponding to the values of each event. """ # Validate input values validate_attribute_values(values) self._values = values self._max = get_max_value(values) self._min = get_min_value(values) @property def values(self): """Values The values of the attribute """ return self._values def _set_values(self, values): """Set values of attribute This function is used when Seq2Pat runs on batches of sequences. In each batch, the attributes and constraints need to be reset to be consistent to the sequences. Since the upper and lower bound are set on the entire set, it would be easier to copy the existing constraints but only reset the attribute values in a batch. It would be prevented to set values directly, thus we add the function to reset the attribute values. """ self._values = values
[docs] def average(self): """The Average Constraint Restricts the average value of a pattern. """ return _Constraint.Average(self)
[docs] def gap(self): """The Gap Constraint Restricts the difference between every two consecutive event values in a pattern. """ return _Constraint.Gap(self)
[docs] def median(self): """The Median Constraint Restricts the median value of a pattern. """ return _Constraint.Median(self)
[docs] def span(self): """The Span Constraint Restricts the difference between the maximum and the minimum value in a pattern. """ return _Constraint.Span(self)
class _BaseConstraint: def __init__(self, attribute: Attribute): self._attribute = attribute self._lower_bound = None self._upper_bound = None @property def attribute(self): return self._attribute @property def lower_bound(self): return self._lower_bound @property def upper_bound(self): return self._upper_bound def has_lower_bound(self): return self.lower_bound is not None def has_upper_bound(self): return self.upper_bound is not None def check_satisfaction(self, value): # Initialize returned results. When there are no constraints, result is explicitly set to be true. res = True if self.has_upper_bound(): if value > self.upper_bound: return False if self.has_lower_bound(): if value < self.lower_bound: return False return res def __le__(self, other): self._upper_bound = other return self def __ge__(self, other): self._lower_bound = other return self class _Constraint(NamedTuple): class Average(_BaseConstraint): def __init__(self, attribute: Attribute): super().__init__(attribute) class Gap(_BaseConstraint): def __init__(self, attribute: Attribute): super().__init__(attribute) def check_satisfaction(self, value): # Initialize returned results to be true. res = True if self.has_upper_bound(): if max(value) > self.upper_bound: return False if self.has_lower_bound(): if min(value) < self.lower_bound: return False return res class Median(_BaseConstraint): def __init__(self, attribute: Attribute): super().__init__(attribute) class Span(_BaseConstraint): def __init__(self, attribute: Attribute): super().__init__(attribute)
[docs]class Seq2Pat: """**Seq2Pat: Sequence-to-Pattern Generation Library** Attributes ---------- sequences: List[list] A list of sequences each with a list of events. The event values can be all strings or all integers. max_span: Optional[int] The value for applying a built-in maximum span constraint to the length of items in mining, max_span=10 by default (10 items). This is going to avoid regular users to run into a scaling issue when data contains long sequences but no constraints are used to run the mining efficiently and practically. Power users can choose to drop this constraint by setting it to be None or increase the maximum span as the system has resources to support. batch_size: Optional[int] The batch_size parameter is set to be None by default, then a mining task runs on the entire data set using a single thread. When batch_size is set, Seq2Pat runs on batches of sequences instead for improving scalability. Each batch contains `batch_size` sequences as a **random** sample of entire set. This is achieved by shuffling the entire set uniformly before we sequentially split the sequences into batches. A mining task will run on each batch with a reduced minimum row count (min_frequency) threshold. Please refer to description of discount_factor parameter for how min_frequency is reduced. Resulted patterns will be aggregated from the mining results of each batch by calculating the sum of the occurrences. Finally the original minimum row count threshold is applied to the patterns after aggregation. When batch_size is None but the dataset has more than _Constants.dynamic_batch_threshold sequences, batch_size is dynamically set to be _Constants.default_seed to ease the mining task on the large dataset by default. Power users can define specific batch_size, discount_factor and n_jobs for gaining more runtime benefit. discount_factor: float A discount factor is used to reduce the minimum row count (min_frequency) threshold when Seq2Pat is applied on a batch. The new threshold for a batch is defined to be max(min_frequency * discount_factor, 1.0/batch_size), where an integer min_frequency will be converted to a ratio first by min_frequency/number_total_sequences. Final results will be based on the aggregation of patterns from each batch by calculating the sum of the occurrences. Theoretically there is a chance that the batching results will be different from non-batching results. But a small discount_factor parameter will make the chance to be minimal and thus we have the same results as running on entire set in practices. A small value of discount_factor is thus recommended. discount_factor=0.2 by default. n_jobs: int n_jobs defines the number of processes (n_jobs=2 by default) that are used when mining tasks are applied on batches in parallel. If -1 all CPUs are used. If -2, all CPUs but one are used. seed: int Random seed to make sequences uniformly distributed among batches. Note ---- For power users who have interests to learn more about the designed batch processing behavior, an Experimental\ Results Summary in the following would be useful. - We have experimental analysis for batch_size vs. discount_factor vs. runtime tested on a data set with \ 100k sequences. - The results show that when batch_size increases, e.g. from 10000 to 100000, we observe an increase in runtime,\ while the mined patterns are all the same as mining on the entire set using single thread. - When batch_size=10000, we get the most runtime benefit compared to running on entire set. - On the same 100k sequences, we set batch_size=10000 and change discount_factor from 0.1 to 1.0. We observe\ that the runtime decreases as discount_factor increases. Only when discount_factor=1.0, the batching mode will\ miss some patterns compared to running on entire set. We would recommend discount_factor=0.2 by default for\ the robustness in results, at the expenses of runtime. - In an even larger test on ~1M sequences, we set batch_size=10000, discount_factor=0.8, n_jobs=8.\ Batch mode saves 60% of the runtime compared to running on entire set, while the resulted patterns from the\ two processes are the same. - When data size is small, e.g., a few thousand sequences, there is no benefit to run batch mode.\ Thus, we would recommend using the batch mode only when data has at least hundreds of thousands of sequences\ for gaining the runtime benefit. """ def __init__(self, sequences: List[list], max_span: Optional[int] = 10, batch_size=None, discount_factor=0.2, n_jobs=2, seed=_Constants.default_seed): # Validate input validate_sequences(sequences) validate_max_span(max_span) validate_batch_args(batch_size, discount_factor, n_jobs, seed) # Input sequences self._sequences: List[list] = sequences # Sequences as strings or integers self._is_string = isinstance(sequences[0][0], str) # If string, internal mapping to between strings and integers if self._is_string: self._str_to_int, self._int_to_str = item_map(self.sequences) self._sequences = string_to_int(self._str_to_int, self.sequences) # Set size variables self._num_rows = len(self.sequences) self._max_num_columns = get_max_column_size(self.sequences) self._max_value = get_max_value(self.sequences) # Constraint store: attribute_id -> constraint_name -> Constraint self.attr_to_cts: Dict[Attribute, Dict[str, _Constraint]] = dict() # Cython implementor object self._cython_imp = None if max_span: # Create index attribute index_attr = Attribute([[i for i in range(len(seq))] for seq in sequences]) # Add built-in maximum span constraint on index. # The minimum span is at least 1 between two indices. Here we add it explicitly. # Given max_span items, the maximum difference on the index is (max_span - 1) self.add_constraint(1 <= index_attr.span() <= (max_span - 1)) # Dynamically set batch_size to ease the mining task on a large dataset by default # If batch_size is None and num_rows > dynamic_batch_threshold, set batch_size to apply batch processing # For dataset having num_rows <= dynamic_batch_threshold, mining task runs on entire set if batch_size=None # Power users can define specific batch_size, discount_factor and n_jobs for gaining more runtime benefit if not batch_size and self._num_rows > _Constants.dynamic_batch_threshold: self.batch_size = _Constants.default_batch_size else: self.batch_size = batch_size self.discount_factor = discount_factor self.n_jobs = n_jobs self.seed = seed # Set an internal rng object self._rng = np.random.default_rng(self.seed) @property def sequences(self) -> List[List]: """Sequence The sequences of Seq2Pat. """ return self._sequences
[docs] def add_constraint(self, constraint: _BaseConstraint) -> _BaseConstraint: """Adds the given constraint to the constraint store. Parameters ---------- constraint: _BaseConstraint A constraint on an attribute object Returns ------- The constraint handle. Raises ------ TypeError: If the constraint is already defined on this attribute. ValueError: If there is a mismatch in length of sequences and their attributes. """ # Attribute and constraint id attr_id = constraint.attribute ct_id = constraint.__class__.__name__ # Create the attribute field, if not created already if attr_id not in self.attr_to_cts: self.attr_to_cts[attr_id] = dict() # If the same constraint type exists on this attribute already, raise error if ct_id in self.attr_to_cts[attr_id]: raise TypeError(ct_id + " constraint is already defined on this attribute.") # Verify that attribute check_true(check_sequence_feature_same_length(self.sequences, constraint.attribute.values), ValueError("Each sequence should match given attributes in event length.")) # Add the constraint self.attr_to_cts[attr_id][ct_id] = constraint # Return constraint handle (so that one can remove it) return constraint
[docs] def remove_constraint(self, constraint: _BaseConstraint) -> NoReturn: """Removes the given constraint from the constraint store. Parameters ---------- constraint: _BaseConstraint A constraint on an attribute object Raises ------ KeyError: If the given constraint does not exist in the constraint store. """ # Attribute and constraint id attribute_id = constraint.attribute constraint_id = constraint.__class__.__name__ try: # Remove the constraint from the attribute del self.attr_to_cts[attribute_id][constraint_id] # If no constraint left on the attribute, remove the attribute if len(self.attr_to_cts[attribute_id]) == 0: del self.attr_to_cts[attribute_id] except KeyError: raise KeyError("No " + constraint_id + " constraint to remove on this attribute.")
def _run_thread(self, min_frequency, q): # Cython implementor object with input parameters set self._cython_imp = self._get_cython_imp(min_frequency) # Frequent mining q.put(self._cython_imp.mine())
[docs] def get_patterns(self, min_frequency: Num) -> List[list]: """Performs the mining operation enforcing the constraints and returns the most frequent patterns. Parameters ---------- min_frequency: Num If int, represents the minimum number of sequences (rows) a pattern should occur. If float, should be (0.0, 1.0] and represents the minimum percentage of sequences (rows) a pattern should occur. Returns ------- Each inner list represents a frequent pattern in the form\ [event_1, event_2, event_3, ... event_n, frequency].\ The last element is the frequency of the pattern.\ Sequences are sored by decreasing frequency, i.e., most frequent pattern first. """ # Check num_rows check_true(self._num_rows >= 1, ValueError("Sequences should not be empty.")) # Check min_frequency conditions # Given a set of `num_rows` sequences, we need to validate min_frequency. # In edge cases when min_frequency is not set properly, e.g. 0 or less than 1/num_rows, program will fail. validate_min_frequency(self._num_rows, min_frequency) if not self.batch_size: # Call Cython backend in a child process, write results to queue q = Queue() thread = Process(target=self._run_thread, args=(min_frequency, q)) thread.start() # Get results from queue patterns = q.get() # Wait for process to finish and join the main process thread.join() else: # When min_frequency is an integer, convert to float before it is applied to batches if isinstance(min_frequency, int): min_frequency = float(min_frequency) / len(self.sequences) patterns = self._get_patterns_batch(min_frequency) # Map back to strings, if original is strings if self._is_string: patterns = int_to_string(self._int_to_str, patterns) # Sort sequences, most frequent pattern first patterns_sorted = sort_pattern(patterns) # Clean up memory gc.collect() # Return frequent sequences return patterns_sorted
def _get_patterns_batch(self, min_frequency: float) -> List[list]: """Get patterns from each batch with a relaxed min_frequency threshold, then aggregate pattern frequencies by using the summation of frequencies from batches. Attributes ---------- min_frequency: float Min_frequency should be a float in (0.0, 1.0] for batch processing. It represents the minimum percentage of sequences (rows) a pattern should occur. Returns ------- List[list] where each inner list represents a frequent pattern in the form [event_1, event_2, event_3, ... event_n, frequency]. """ # Shuffle sequences uniformly to make each batch have similar pattern occurrences sequences, attr_to_cs = self._shuffle_data() # Get number of chunks n_sequences = len(sequences) num_chunks = n_sequences // self.batch_size if n_sequences % self.batch_size > 0: num_chunks += 1 # Run Seq2Pat in parallel on each chunk, with a lower min_frequency threshold to get a larger set of patterns batch_patterns = Parallel(n_jobs=self.n_jobs, require='sharedmem')( delayed(self._mining_batch)(i, sequences, attr_to_cs, min_frequency) for i in range(0, num_chunks)) # Aggregate patterns from each chunk over the counts and apply min_frequency threshold min_row_count = int(n_sequences * min_frequency) agg_patterns = aggregate_patterns(batch_patterns, min_row_count) return agg_patterns def _shuffle_data(self) -> Tuple[List[List], Dict[Attribute, Dict[str, _Constraint]]]: """Shuffle sequences and attributes before running seq2pat on batches. Returns ------- shuffled_sequences: List[List] The shuffled sequences. shuffled_attr_to_cs: Dict[Attribute, Dict[str, _Constraint]] The shuffled constraints. """ indices = list(range(len(self.sequences))) self._rng.shuffle(indices) shuffled_sequences = [self.sequences[i] for i in indices] shuffled_attr_to_cts = deepcopy(self.attr_to_cts) for attr in shuffled_attr_to_cts: for cs in shuffled_attr_to_cts[attr]: old_constraint = shuffled_attr_to_cts[attr][cs] new_constraint = deepcopy(old_constraint) shuffled_values = [old_constraint.attribute.values[i] for i in indices] new_constraint.attribute._set_values(shuffled_values) shuffled_attr_to_cts[attr][cs] = new_constraint return shuffled_sequences, shuffled_attr_to_cts def _mining_batch(self, chunk_ind: int, sequences: List[List], attr_to_cs: Dict[Attribute, Dict[str, _Constraint]], min_frequency: float) -> List[List]: """Implementor of pattern mining on each batch Attributes ---------- chunk_ind: int chunk index sequences: List[list] A list of sequences each with a list of events. The event values can be all strings or all integers. attr_to_cs: Dict[Attribute, Dict[str, _Constraint]] min_frequency: float Min_frequency should be a float in (0.0, 1.0] for batch processing It represents the minimum percentage of sequences (rows) a pattern should occur. Returns: ------- List[list] where each inner list represents a frequent pattern in the form [event_1, event_2, event_3, ... event_n, frequency]. The last element is the frequency of the pattern. Sequences are sored by decreasing frequency, i.e., most frequent pattern first. """ # Get one batch of sequences batch_sequences = sequences[chunk_ind * self.batch_size:(chunk_ind + 1) * self.batch_size] # Create seq2pat instance for one batch batch_seq2pat = Seq2Pat(batch_sequences, max_span=None, batch_size=None) # Create constraints for one batch for attr in attr_to_cs: for cs in attr_to_cs[attr]: old_constraint = attr_to_cs[attr][cs] new_constraint = deepcopy(old_constraint) new_constraint.attribute._set_values(old_constraint.attribute.values[chunk_ind * self.batch_size: (chunk_ind + 1) * self.batch_size]) batch_seq2pat.add_constraint(new_constraint) # Reduce min_frequency to be min_frequency * discount_factor, 0 < discount_factor < 1 adjusted_min_frequency = update_min_frequency(len(batch_sequences), min_frequency, self.discount_factor) return batch_seq2pat.get_patterns(adjusted_min_frequency) def _get_cython_imp(self, min_frequency) -> stp.PySeq2pat: """Creates and populates a Cython PySeq2Pat object based on the user inputs by translating sequential attribute into their appropriate PySeq2Pat representation and setting them. :param min_frequency: the minimum number of sequences(rows) to observe the pattern :return: PySeq2Pat object with all the necessary inputs set """ cython_imp = stp.PySeq2pat() # Dictionary to hold all parameters that need to be set in seq_to_pat more information about what each # parameter represents can be found under Constants declaration params = {_Constants.lgap: [], _Constants.ugap: [], _Constants.lavr: [], _Constants.uavr: [], _Constants.lspn: [], _Constants.uspn: [], _Constants.lmed: [], _Constants.umed: [], _Constants.ugapi: [], _Constants.lgapi: [], _Constants.uspni: [], _Constants.lspni: [], _Constants.uavri: [], _Constants.lavri: [], _Constants.umedi: [], _Constants.lmedi: [], _Constants.num_minmax: [], _Constants.num_avr: [], _Constants.num_med: [], _Constants.tot_gap: [], _Constants.tot_spn: [], _Constants.tot_avr: [], _Constants.num_att: 0, _Constants.items: self.sequences, _Constants.attrs: [], _Constants.M: self._max_num_columns, _Constants.N: self._num_rows, _Constants.L: self._max_value, _Constants.max_attrs: [], _Constants.min_attrs: []} # Iterate through all attributes and constraint and build parameters required by seq_to_pat for attribute, constraints in self.attr_to_cts.items(): params[_Constants.num_att] += 1 params[_Constants.num_minmax].append(0) params[_Constants.num_avr].append(0) params[_Constants.num_med].append(0) params[_Constants.max_attrs].append(attribute._max) params[_Constants.min_attrs].append(attribute._min) params[_Constants.attrs].append(attribute.values) for constraint_type, constraint in constraints.items(): if isinstance(constraint, _Constraint.Average): self._update_average_params(params, constraint) if isinstance(constraint, _Constraint.Gap): self._update_gap_params(params, constraint) if isinstance(constraint, _Constraint.Median): self._update_median_params(params, constraint) if isinstance(constraint, _Constraint.Span): self._update_span_params(params, constraint) # Given all the parameters, setup the c++ object through the intermediary cython seq_to_pat.pyx for constraint, value in params.items(): # print(constraint, " ", value) setattr(cython_imp, constraint, value) # Set frequency as a row count or row percentage. 1.0 will be used as percentage. if isinstance(min_frequency, float) and min_frequency <= 1.0: cython_imp.theta = cython_imp.N * min_frequency else: cython_imp.theta = min_frequency return cython_imp @staticmethod def _update_average_params(params: dict, constraint: _Constraint.Average) -> NoReturn: """Updates average constraint inputs for C++ backend :param constraint: average constraint obj """ # attribute id for attribute that a constraint will be enforced on att_id = params[_Constants.num_att] - 1 params[_Constants.tot_avr].append(att_id) if constraint.has_lower_bound(): params[_Constants.lavr].append(constraint.lower_bound) params[_Constants.lavri].append(att_id) params[_Constants.num_avr][-1] += 1 if constraint.has_upper_bound(): params[_Constants.uavr].append(constraint.upper_bound) params[_Constants.uavri].append(att_id) params[_Constants.num_avr][-1] += 1 @staticmethod def _update_gap_params(params: dict, constraint: _Constraint.Gap) -> NoReturn: """Update gap constraint inputs for C++ backend :param constraint: gap constraint obj """ # attribute id for attribute that a constraint will be enforced on att_id = params[_Constants.num_att] - 1 params[_Constants.tot_gap].append(att_id) if constraint.has_lower_bound(): params[_Constants.lgap].append(constraint.lower_bound) params[_Constants.lgapi].append(att_id) if constraint.has_upper_bound(): params[_Constants.ugap].append(constraint.upper_bound) params[_Constants.ugapi].append(att_id) @staticmethod def _update_median_params(params: dict, constraint: _Constraint.Median) -> NoReturn: """Updates median constraint inputs for C++ backend :param constraint: median constraint obj """ # attribute id for attribute that a constraint will be enforced on att_id = params[_Constants.num_att] - 1 if constraint.has_lower_bound(): params[_Constants.lmed].append(constraint.lower_bound) params[_Constants.lmedi].append(att_id) params[_Constants.num_med][-1] += 1 if constraint.has_upper_bound(): params[_Constants.umed].append(constraint.upper_bound) params[_Constants.umedi].append(att_id) params[_Constants.num_med][-1] += 1 @staticmethod def _update_span_params(params: dict, constraint: _Constraint.Span) -> NoReturn: """Updates span constraint inputs for C++ backend :param constraint: span constraint obj """ """attribute id for attribute that a constraint will be enforced on, we want att_id to start from zero but are using the number of attribute so we simply start from one.""" att_id = params[_Constants.num_att] - 1 params[_Constants.tot_spn].append(att_id) if constraint.has_lower_bound(): params[_Constants.lspn].append(constraint.lower_bound) params[_Constants.lspni].append(att_id) params[_Constants.num_minmax][-1] += 2 if constraint.has_upper_bound(): params[_Constants.uspn].append(constraint.upper_bound) params[_Constants.uspni].append(att_id) def __str__(self) -> str: """return human-readable string representation of the class """ str = "\n\nSeq2Pat" for attribute, constraints in self.attr_to_cts.items(): str += "\nConstraints of Attribute: " + repr(attribute) str += "\nConstraints: " + repr(constraints) for constraint_type, constraint in constraints.items(): str += "\nConstraint: " + constraint_type + " " + repr(constraint) str += "\nLB: " + repr(constraint.lower_bound) str += "\nUB: " + repr(constraint.upper_bound) return str