import numpy as np
import pandas as pd
import seaborn  as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler,MinMaxScaler
from sklearn.model_selection import cross_val_score, GridSearchCV
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix,classification_report, ConfusionMatrixDisplay


dataset = pd.read_csv('data/market_cluster.csv', encoding='latin1')


dataset.head()


dataset = pd.DataFrame(dataset)


dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9994 entries, 0 to 9993
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Order ID       9994 non-null   object 
 1   Customer Name  9994 non-null   object 
 2   Category       9994 non-null   object 
 3   Sub Category   9994 non-null   object 
 4   City           9994 non-null   object 
 5   Order Date     9994 non-null   object 
 6   Region         9994 non-null   object 
 7   Sales          9994 non-null   int64  
 8   Discount       9994 non-null   float64
 9   Profit         9994 non-null   float64
 10  State          9994 non-null   object 
 11  profit_margin  9994 non-null   float64
 12  Cluster        9994 non-null   object 
dtypes: float64(3), int64(1), object(9)
memory usage: 1015.1+ KB


dataset.drop(['Order ID'], axis=1, inplace=True)


dataset.isna().sum()

Customer Name    0
Category         0
Sub Category     0
City             0
Order Date       0
Region           0
Sales            0
Discount         0
Profit           0
State            0
profit_margin    0
Cluster          0
dtype: int64


dataset.dropna(inplace=True)


def remove_outliers(data: pd.DataFrame, column: str) -> pd.Series:
    q3, q1 = np.nanpercentile(data[column], [75, 25])
    iqr = q3 - q1
    upper_bound = q3 + 1.5 * iqr
    lower_bound = q1 - 1.5 * iqr
    data = data[(data[column] > lower_bound) & (data[column] < upper_bound)]

    return data

dataset = remove_outliers(dataset, 'Discount')
dataset = remove_outliers(dataset, 'Sales')
dataset = remove_outliers(dataset, 'Profit')


dataset.head()


sns.histplot(dataset['Cluster'])

<AxesSubplot:xlabel='Cluster', ylabel='Count'>


encoder = LabelEncoder()
scaler = StandardScaler()
onehot = OneHotEncoder()
minmaxscaler = MinMaxScaler()


dataset["Order Date"] = pd.to_datetime(dataset["Order Date"])
dataset["Order Date"] = dataset["Order Date"].dt.month
dataset["Customer Name"] = encoder.fit_transform(dataset["Customer Name"])
dataset["Category"] = encoder.fit_transform(dataset["Category"])
dataset["City"] = encoder.fit_transform(dataset["City"])
dataset["Region"] = encoder.fit_transform(dataset["Region"])
dataset["State"] = encoder.fit_transform(dataset["State"])
dataset["Sub Category"] = encoder.fit_transform(dataset["Sub Category"])

# dataset["Order Date"] = pd.to_datetime(dataset["Order Date"])
# dataset["Order Date"] = dataset["Order Date"].dt.month
# dataset["Customer Name"] = onehot.fit_transform(dataset["Customer Name"].values.reshape(-1, 1)).toarray()
# dataset["Category"] = onehot.fit_transform(dataset["Category"].values.reshape(-1, 1)).toarray()
# dataset["City"] = onehot.fit_transform(dataset["City"].values.reshape(-1, 1)).toarray()
# dataset["Region"] = onehot.fit_transform(dataset["Region"].values.reshape(-1, 1)).toarray()
# dataset["State"] = onehot.fit_transform(dataset["State"].values.reshape(-1, 1)).toarray()
# dataset["Sub Category"] = onehot.fit_transform(dataset["Sub Category"].values.reshape(-1, 1)).toarray()
dataset["Order Date"] = encoder.fit_transform(dataset["Order Date"])


dataset[["Sales", "Discount", "profit_margin"]] = scaler.fit_transform(dataset[["Sales", "Discount", "profit_margin"]])
dataset["Profit"] = minmaxscaler.fit_transform(dataset["Profit"].values.reshape(-1, 1))


class_to_numeric = {'Low': 0, 'Medium': 1, 'High': 2}
dataset['Cluster'] = [class_to_numeric[label] for label in dataset['Cluster']]


dataset.head()


X = dataset.drop(['Cluster','Sub Category','State','Profit','profit_margin'],axis=1)
y = dataset['Profit']


X.head()


X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


heatcol = X.corr()
sns.heatmap(heatcol,cmap="BrBG",annot=True)

<AxesSubplot:>


print("Dimension of Train set",X_train.shape)
print("Dimension of Val set",X_val.shape)
print("Dimension of Test set",X_test.shape,"\n")

num_cols = X_train._get_numeric_data().columns
print("Number of numeric features:",num_cols.size)

Dimension of Train set (7960, 7)
Dimension of Val set (995, 7)
Dimension of Test set (996, 7) 

Number of numeric features: 7


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from scikeras.wrappers import KerasRegressor


def create_model(units=64, optimizer='adam', loss='mae'):
    model = Sequential()
    model.add(LSTM(units, input_shape=(X_train.shape[1], 1)))
    model.add(Dense(units, activation='relu'))
    model.compile(optimizer=optimizer, loss=loss, metrics=['mean_absolute_error'])
    
    return model


model = KerasRegressor(build_fn=create_model, units=32, epochs=100, batch_size=32, verbose=0)

param_grid = {
    'optimizer': ['adam', 'sgd', 'rmsprop'],
    'units': [32, 64, 128],
    'loss' : ['mae', 'mse']
}

grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, scoring='accuracy')
grid_result = grid.fit(X_train, y_train)

c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 127360]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 254720]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:821: UserWarning: Scoring failed. The score on this train-test partition for these parameters will be set to nan. Details: 
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 810, in _score
    scores = scorer(estimator, X_test, y_test)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 266, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_scorer.py", line 355, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\_param_validation.py", line 214, in wrapper
    return func(*args, **kwargs)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 220, in accuracy_score
    y_type, y_true, y_pred = _check_targets(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\metrics\_classification.py", line 84, in _check_targets
    check_consistent_length(y_true, y_pred)
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\utils\validation.py", line 407, in check_consistent_length
    raise ValueError(
ValueError: Found input variables with inconsistent numbers of samples: [3980, 509440]

  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py:425: FitFailedWarning: 
18 fits failed out of a total of 36.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
18 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py", line 760, in fit
    self._fit(
  File "c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py", line 926, in _fit
    self._check_model_compatibility(y)
  File "c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py", line 569, in _check_model_compatibility
    raise ValueError(
ValueError: loss=mse but model compiled with mae. Data may not match loss function!

  warnings.warn(some_fits_failed_message, FitFailedWarning)
c:\Users\Asus\anaconda3\lib\site-packages\sklearn\model_selection\_search.py:979: UserWarning: One or more of the test scores are non-finite: [nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan]
  warnings.warn(
c:\Users\Asus\anaconda3\lib\site-packages\scikeras\wrappers.py:915: UserWarning: ``build_fn`` will be renamed to ``model`` in a future release, at which point use of ``build_fn`` will raise an Error instead.
  X, y = self._initialize(X, y)


print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: nan using {'loss': 'mae', 'optimizer': 'adam', 'units': 32}


optimizer = grid_result.best_params_['optimizer']
units = grid_result.best_params_['units']
loss = grid_result.best_params_['loss']


model = Sequential()
model.add(LSTM(units, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dense(1))

model.compile(optimizer=optimizer, loss=loss, metrics=['mean_absolute_error'])
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val))

loss = model.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Data: {loss}')

Epoch 1/100
249/249 [==============================] - 3s 6ms/step - loss: 0.1708 - mean_absolute_error: 0.1708 - val_loss: 0.1632 - val_mean_absolute_error: 0.1632
Epoch 2/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1559 - mean_absolute_error: 0.1559 - val_loss: 0.1557 - val_mean_absolute_error: 0.1557
Epoch 3/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1545 - mean_absolute_error: 0.1545 - val_loss: 0.1544 - val_mean_absolute_error: 0.1544
Epoch 4/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1529 - mean_absolute_error: 0.1529 - val_loss: 0.1544 - val_mean_absolute_error: 0.1544
Epoch 5/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1526 - mean_absolute_error: 0.1526 - val_loss: 0.1536 - val_mean_absolute_error: 0.1536
Epoch 6/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1517 - mean_absolute_error: 0.1517 - val_loss: 0.1534 - val_mean_absolute_error: 0.1534
Epoch 7/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1515 - mean_absolute_error: 0.1515 - val_loss: 0.1537 - val_mean_absolute_error: 0.1537
Epoch 8/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1510 - mean_absolute_error: 0.1510 - val_loss: 0.1543 - val_mean_absolute_error: 0.1543
Epoch 9/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1514 - mean_absolute_error: 0.1514 - val_loss: 0.1527 - val_mean_absolute_error: 0.1527
Epoch 10/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1515 - mean_absolute_error: 0.1515 - val_loss: 0.1549 - val_mean_absolute_error: 0.1549
Epoch 11/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1513 - mean_absolute_error: 0.1513 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 12/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1509 - mean_absolute_error: 0.1509 - val_loss: 0.1529 - val_mean_absolute_error: 0.1529
Epoch 13/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1511 - mean_absolute_error: 0.1511 - val_loss: 0.1531 - val_mean_absolute_error: 0.1531
Epoch 14/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1511 - mean_absolute_error: 0.1511 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 15/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1506 - mean_absolute_error: 0.1506 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 16/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1508 - mean_absolute_error: 0.1508 - val_loss: 0.1552 - val_mean_absolute_error: 0.1552
Epoch 17/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1509 - mean_absolute_error: 0.1509 - val_loss: 0.1530 - val_mean_absolute_error: 0.1530
Epoch 18/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1508 - mean_absolute_error: 0.1508 - val_loss: 0.1528 - val_mean_absolute_error: 0.1528
Epoch 19/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1508 - mean_absolute_error: 0.1508 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 20/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1506 - mean_absolute_error: 0.1506 - val_loss: 0.1531 - val_mean_absolute_error: 0.1531
Epoch 21/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1506 - mean_absolute_error: 0.1506 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 22/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1505 - mean_absolute_error: 0.1505 - val_loss: 0.1530 - val_mean_absolute_error: 0.1530
Epoch 23/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1508 - mean_absolute_error: 0.1508 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 24/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1506 - mean_absolute_error: 0.1506 - val_loss: 0.1527 - val_mean_absolute_error: 0.1527
Epoch 25/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1504 - mean_absolute_error: 0.1504 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 26/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1509 - mean_absolute_error: 0.1509 - val_loss: 0.1524 - val_mean_absolute_error: 0.1524
Epoch 27/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1502 - mean_absolute_error: 0.1502 - val_loss: 0.1521 - val_mean_absolute_error: 0.1521
Epoch 28/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1500 - mean_absolute_error: 0.1500 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 29/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1503 - mean_absolute_error: 0.1503 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 30/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1502 - mean_absolute_error: 0.1502 - val_loss: 0.1522 - val_mean_absolute_error: 0.1522
Epoch 31/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1499 - mean_absolute_error: 0.1499 - val_loss: 0.1523 - val_mean_absolute_error: 0.1523
Epoch 32/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1503 - mean_absolute_error: 0.1503 - val_loss: 0.1521 - val_mean_absolute_error: 0.1521
Epoch 33/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1501 - mean_absolute_error: 0.1501 - val_loss: 0.1527 - val_mean_absolute_error: 0.1527
Epoch 34/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1501 - mean_absolute_error: 0.1501 - val_loss: 0.1528 - val_mean_absolute_error: 0.1528
Epoch 35/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1500 - mean_absolute_error: 0.1500 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 36/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1501 - mean_absolute_error: 0.1501 - val_loss: 0.1536 - val_mean_absolute_error: 0.1536
Epoch 37/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1500 - mean_absolute_error: 0.1500 - val_loss: 0.1522 - val_mean_absolute_error: 0.1522
Epoch 38/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1500 - mean_absolute_error: 0.1500 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 39/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1501 - mean_absolute_error: 0.1501 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 40/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1498 - mean_absolute_error: 0.1498 - val_loss: 0.1524 - val_mean_absolute_error: 0.1524
Epoch 41/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1502 - mean_absolute_error: 0.1502 - val_loss: 0.1524 - val_mean_absolute_error: 0.1524
Epoch 42/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1494 - mean_absolute_error: 0.1494 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 43/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1499 - mean_absolute_error: 0.1499 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 44/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1497 - mean_absolute_error: 0.1497 - val_loss: 0.1541 - val_mean_absolute_error: 0.1541
Epoch 45/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1498 - mean_absolute_error: 0.1498 - val_loss: 0.1551 - val_mean_absolute_error: 0.1551
Epoch 46/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1500 - mean_absolute_error: 0.1500 - val_loss: 0.1520 - val_mean_absolute_error: 0.1520
Epoch 47/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1495 - mean_absolute_error: 0.1495 - val_loss: 0.1521 - val_mean_absolute_error: 0.1521
Epoch 48/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1494 - mean_absolute_error: 0.1494 - val_loss: 0.1522 - val_mean_absolute_error: 0.1522
Epoch 49/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1495 - mean_absolute_error: 0.1495 - val_loss: 0.1531 - val_mean_absolute_error: 0.1531
Epoch 50/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1496 - mean_absolute_error: 0.1496 - val_loss: 0.1523 - val_mean_absolute_error: 0.1523
Epoch 51/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1494 - mean_absolute_error: 0.1494 - val_loss: 0.1535 - val_mean_absolute_error: 0.1535
Epoch 52/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1495 - mean_absolute_error: 0.1495 - val_loss: 0.1527 - val_mean_absolute_error: 0.1527
Epoch 53/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1495 - mean_absolute_error: 0.1495 - val_loss: 0.1537 - val_mean_absolute_error: 0.1537
Epoch 54/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1491 - mean_absolute_error: 0.1491 - val_loss: 0.1523 - val_mean_absolute_error: 0.1523
Epoch 55/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1494 - mean_absolute_error: 0.1494 - val_loss: 0.1525 - val_mean_absolute_error: 0.1525
Epoch 56/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1492 - mean_absolute_error: 0.1492 - val_loss: 0.1521 - val_mean_absolute_error: 0.1521
Epoch 57/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1491 - mean_absolute_error: 0.1491 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 58/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1493 - mean_absolute_error: 0.1493 - val_loss: 0.1523 - val_mean_absolute_error: 0.1523
Epoch 59/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1491 - mean_absolute_error: 0.1491 - val_loss: 0.1530 - val_mean_absolute_error: 0.1530
Epoch 60/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1488 - mean_absolute_error: 0.1488 - val_loss: 0.1523 - val_mean_absolute_error: 0.1523
Epoch 61/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1487 - mean_absolute_error: 0.1487 - val_loss: 0.1529 - val_mean_absolute_error: 0.1529
Epoch 62/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1488 - mean_absolute_error: 0.1488 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 63/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1485 - mean_absolute_error: 0.1485 - val_loss: 0.1540 - val_mean_absolute_error: 0.1540
Epoch 64/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1484 - mean_absolute_error: 0.1484 - val_loss: 0.1532 - val_mean_absolute_error: 0.1532
Epoch 65/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1484 - mean_absolute_error: 0.1484 - val_loss: 0.1528 - val_mean_absolute_error: 0.1528
Epoch 66/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1485 - mean_absolute_error: 0.1485 - val_loss: 0.1544 - val_mean_absolute_error: 0.1544
Epoch 67/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1487 - mean_absolute_error: 0.1487 - val_loss: 0.1543 - val_mean_absolute_error: 0.1543
Epoch 68/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1484 - mean_absolute_error: 0.1484 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 69/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1481 - mean_absolute_error: 0.1481 - val_loss: 0.1541 - val_mean_absolute_error: 0.1541
Epoch 70/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1478 - mean_absolute_error: 0.1478 - val_loss: 0.1529 - val_mean_absolute_error: 0.1529
Epoch 71/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1479 - mean_absolute_error: 0.1479 - val_loss: 0.1532 - val_mean_absolute_error: 0.1532
Epoch 72/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1478 - mean_absolute_error: 0.1478 - val_loss: 0.1528 - val_mean_absolute_error: 0.1528
Epoch 73/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1479 - mean_absolute_error: 0.1479 - val_loss: 0.1538 - val_mean_absolute_error: 0.1538
Epoch 74/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1476 - mean_absolute_error: 0.1476 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 75/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1477 - mean_absolute_error: 0.1477 - val_loss: 0.1532 - val_mean_absolute_error: 0.1532
Epoch 76/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1475 - mean_absolute_error: 0.1475 - val_loss: 0.1526 - val_mean_absolute_error: 0.1526
Epoch 77/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1474 - mean_absolute_error: 0.1474 - val_loss: 0.1535 - val_mean_absolute_error: 0.1535
Epoch 78/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1473 - mean_absolute_error: 0.1473 - val_loss: 0.1534 - val_mean_absolute_error: 0.1534
Epoch 79/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1473 - mean_absolute_error: 0.1473 - val_loss: 0.1531 - val_mean_absolute_error: 0.1531
Epoch 80/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1474 - mean_absolute_error: 0.1474 - val_loss: 0.1531 - val_mean_absolute_error: 0.1531
Epoch 81/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1471 - mean_absolute_error: 0.1471 - val_loss: 0.1540 - val_mean_absolute_error: 0.1540
Epoch 82/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1471 - mean_absolute_error: 0.1471 - val_loss: 0.1536 - val_mean_absolute_error: 0.1536
Epoch 83/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1466 - mean_absolute_error: 0.1466 - val_loss: 0.1540 - val_mean_absolute_error: 0.1540
Epoch 84/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1469 - mean_absolute_error: 0.1469 - val_loss: 0.1533 - val_mean_absolute_error: 0.1533
Epoch 85/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1466 - mean_absolute_error: 0.1466 - val_loss: 0.1535 - val_mean_absolute_error: 0.1535
Epoch 86/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1466 - mean_absolute_error: 0.1466 - val_loss: 0.1555 - val_mean_absolute_error: 0.1555
Epoch 87/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1464 - mean_absolute_error: 0.1464 - val_loss: 0.1542 - val_mean_absolute_error: 0.1542
Epoch 88/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1463 - mean_absolute_error: 0.1463 - val_loss: 0.1539 - val_mean_absolute_error: 0.1539
Epoch 89/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1461 - mean_absolute_error: 0.1461 - val_loss: 0.1540 - val_mean_absolute_error: 0.1540
Epoch 90/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1461 - mean_absolute_error: 0.1461 - val_loss: 0.1544 - val_mean_absolute_error: 0.1544
Epoch 91/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1459 - mean_absolute_error: 0.1459 - val_loss: 0.1545 - val_mean_absolute_error: 0.1545
Epoch 92/100
249/249 [==============================] - 1s 6ms/step - loss: 0.1459 - mean_absolute_error: 0.1459 - val_loss: 0.1550 - val_mean_absolute_error: 0.1550
Epoch 93/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1458 - mean_absolute_error: 0.1458 - val_loss: 0.1578 - val_mean_absolute_error: 0.1578
Epoch 94/100
249/249 [==============================] - 1s 6ms/step - loss: 0.1456 - mean_absolute_error: 0.1456 - val_loss: 0.1543 - val_mean_absolute_error: 0.1543
Epoch 95/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1452 - mean_absolute_error: 0.1452 - val_loss: 0.1548 - val_mean_absolute_error: 0.1548
Epoch 96/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1455 - mean_absolute_error: 0.1455 - val_loss: 0.1548 - val_mean_absolute_error: 0.1548
Epoch 97/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1451 - mean_absolute_error: 0.1451 - val_loss: 0.1550 - val_mean_absolute_error: 0.1550
Epoch 98/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1449 - mean_absolute_error: 0.1449 - val_loss: 0.1556 - val_mean_absolute_error: 0.1556
Epoch 99/100
249/249 [==============================] - 1s 5ms/step - loss: 0.1451 - mean_absolute_error: 0.1451 - val_loss: 0.1541 - val_mean_absolute_error: 0.1541
Epoch 100/100
249/249 [==============================] - 1s 4ms/step - loss: 0.1446 - mean_absolute_error: 0.1446 - val_loss: 0.1569 - val_mean_absolute_error: 0.1569
32/32 [==============================] - 0s 2ms/step - loss: 0.1514 - mean_absolute_error: 0.1514
Mean Squared Error on Test Data: [0.15143892168998718, 0.15143892168998718]


from tensorflow.keras.utils import plot_model
model.summary()
plot_model(model, to_file='mlp-mnist.png', show_shapes=True)

Model: "sequential_37"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 lstm_37 (LSTM)              (None, 32)                4352      
                                                                 
 dense_37 (Dense)            (None, 1)                 33        
                                                                 
=================================================================
Total params: 4385 (17.13 KB)
Trainable params: 4385 (17.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


import joblib
joblib.dump(model, 'model/lstmr_model.pkl')

INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpa46czm2h\assets

INFO:tensorflow:Assets written to: C:\Users\Asus\AppData\Local\Temp\tmpa46czm2h\assets

['model/lstmr_model.pkl']


plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss over epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')

plt.subplot(1, 2, 2)
plt.plot(history.history['mean_absolute_error'])
plt.plot(history.history['val_mean_absolute_error'])
plt.title('Accuracy over epochs')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')

plt.tight_layout()
plt.show()


y_test = y_test.values.reshape(-1, 1)


predictions_scaled = model.predict(X_test)
y_pred = minmaxscaler.inverse_transform(predictions_scaled)
y_test = minmaxscaler.inverse_transform(y_test)

32/32 [==============================] - 0s 2ms/step


from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)


print(f'Mean Squared Error: {mse}')
print(f'Mean Absolute Error: {mae}')
print(f'Root Mean Squared Error: {rmse} \n')
print(f'R-squared: {r2}')
print(f'accuracy : {r2}')

Mean Squared Error: 37540.84429850945
Mean Absolute Error: 154.22993847735916
Root Mean Squared Error: 193.75459813513962 

R-squared: 0.2650996939108293
accuracy : 0.2650996939108293


plt.scatter(y_test, y_pred, label="Predicted", color='red')
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='blue', label='Perfect Prediction')  # Adding a line for perfect prediction
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.legend()
plt.title('Actual vs Predicted Values')
plt.show()


# Visualisasi
plt.figure(figsize=(12, 6))

# Plot nilai sebenarnya
plt.plot(y_test, label='Actual', color='blue')

# Plot prediksi
plt.plot(y_pred, label='Predicted', color='red')

plt.title('LSTM Regression - Actual vs Predicted')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.show()


# Grouping and averaging 'y_test' and 'y_pred' values
group_size = 10  # Define the size of each group
averaged_y_test = [np.mean(y_test[i:i + group_size]) for i in range(0, len(y_test), group_size)]
averaged_y_pred = [np.mean(y_pred[i:i + group_size]) for i in range(0, len(y_pred), group_size)]

# Plotting the averaged values
plt.figure(figsize=(12, 6))
plt.plot(averaged_y_test, label='Actual (Averaged)', color='blue')
plt.plot(averaged_y_pred, label='Predicted (Averaged)', color='red')

plt.title('RNN Regression - Averaged Actual vs Predicted')
plt.xlabel('Grouped Datapoints')
plt.ylabel('Average Value')
plt.legend()
plt.show()

	Order ID	Customer Name	Category	Sub Category	City	Order Date	Region	Sales	Discount	Profit	State	profit_margin	Cluster
0	OD1	Harish	Oil & Masala	Masalas	Vellore	11-08-2017	North	1254	0.12	401.28	Tamil Nadu	0.32	Medium
1	OD2	Sudha	Beverages	Health Drinks	Krishnagiri	11-08-2017	South	749	0.18	149.80	Tamil Nadu	0.20	Medium
2	OD3	Hussain	Food Grains	Atta & Flour	Perambalur	06-12-2017	West	2360	0.21	165.20	Tamil Nadu	0.07	Low
3	OD4	Jackson	Fruits & Veggies	Fresh Vegetables	Dharmapuri	10-11-2016	South	896	0.25	89.60	Tamil Nadu	0.10	Low
4	OD5	Ridhesh	Food Grains	Organic Staples	Ooty	10-11-2016	South	2355	0.26	918.45	Tamil Nadu	0.39	High

	Customer Name	Category	Sub Category	City	Order Date	Region	Sales	Discount	Profit	State	profit_margin	Cluster
0	Harish	Oil & Masala	Masalas	Vellore	11-08-2017	North	1254	0.12	401.28	Tamil Nadu	0.32	Medium
1	Sudha	Beverages	Health Drinks	Krishnagiri	11-08-2017	South	749	0.18	149.80	Tamil Nadu	0.20	Medium
2	Hussain	Food Grains	Atta & Flour	Perambalur	06-12-2017	West	2360	0.21	165.20	Tamil Nadu	0.07	Low
3	Jackson	Fruits & Veggies	Fresh Vegetables	Dharmapuri	10-11-2016	South	896	0.25	89.60	Tamil Nadu	0.10	Low
4	Ridhesh	Food Grains	Organic Staples	Ooty	10-11-2016	South	2355	0.26	918.45	Tamil Nadu	0.39	High

Libraries and Dataset¶

Data Cleaning & Preprocessing¶

Split Data and Encoder¶

LSTMr¶

	Customer Name	Category	Sub Category	City	Order Date	Region	Sales	Discount	Profit	profit_margin	Cluster
0	12	5	14	21	10	2	-0.414559	-1.430908	0.369225	0.595874	1
1	37	1	13	8	10	3	-1.291968	-0.627370	0.122296	-0.416872	1
2	14	3	0	13	5	4	1.507054	-0.225601	0.137417	-1.514014	0
3	15	4	12	4	9	3	-1.036563	0.310092	0.063185	-1.260827	0
4	28	3	18	12	9	3	1.498367	0.444015	0.877036	1.186643	2