Autoencoder Dimensionality Error












1












$begingroup$


Thanks for taking a look!



I have an auto-encoder that I am trying to use for anomaly detection. I have 2 log files, logfile.log and testfile.log. They're essentially the same logfile, I just split them for training and testing purposes. The logfile consists of millions of lines, where each line is an log entry or observation.



I have already preprocessed the files a little bit, such that they are JSON dictionaries where each line is a dictionary representing an observation and each key represents a value in the dictionary. They are nested, but in my code I flatten it for simplicity's sake.



My goal is to train the autoencoder on logfile.log and then run the model against testfile.log line by line and outputting the reconstruction error. Depending on how large the reconstruction error is, I will flag it. (Haven't started on that portion yet)



Model:



import keras
from keras.layers import Input, Dense
from keras.models import Model
import numpy as np
from tensorflow import set_random_seed
import os
import json
from flatten_json import flatten
import time
from sklearn.preprocessing import LabelEncoder


vartime=time.time()
log_file = './logfile.log'
def seedy(s):
np.random.seed(s)
set_random_seed(s)
decshape = 0
class AutoEncoder:
def __init__(self, encoding_dim=31):
self.encoding_dim = encoding_dim
#r = lambda: np.random.randint(1, 5)
newdata =

with open(log_file, 'r') as file:
counter = 0
buffer = ''
data =
print(np.intp)
text = file.readlines(40000000)
for line in text:
data =
counter = counter+1
buffer=json.loads(line)
buffer = flatten(buffer)
if counter%50000 == 0:
print(counter)
print(time.time()-vartime)
data.append((buffer.get("Key1","None")))
data.append((buffer.get("Key2","None")))
data.append((buffer.get("Key3","None")))
data.append(int((buffer.get("Key4","None"))[11:12]))
data.append((buffer.get("key4", "None")))
data.append((buffer.get("key5","None")))
data.append((buffer.get("key6","None")))
newdata.append(data)
#print("TESTn")
#print(newdata)

self.x = np.array(newdata)
labelencoder_X_1 = LabelEncoder()
self.x[:, 0] = labelencoder_X_1.fit_transform(self.x[:,0])

labelencoder_X_2 = LabelEncoder()
self.x[:, 1] = labelencoder_X_2.fit_transform(self.x[:, 1])

labelencoder_X_3 = LabelEncoder()
self.x[:, 2] = labelencoder_X_3.fit_transform(self.x[:, 2])

labelencoder_X_4 = LabelEncoder()
self.x[:, 4] = labelencoder_X_4.fit_transform(self.x[:, 4])

labelencoder_X_5 = LabelEncoder()
self.x[:, 5] = labelencoder_X_5.fit_transform(self.x[:, 5])

labelencoder_X_6 = LabelEncoder()
self.x[:, 6] = labelencoder_X_6.fit_transform(self.x[:, 6])
self.x = keras.utils.to_categorical(self.x, dtype='float32')

#X_train, X_test = train_test_split(self.x, test_size = 0.1, random_state = 0)

def _encoder(self):
inputs = Input(shape=(self.x[0].shape))
#keras.layers.BatchNormalization()
encoded = Dense(self.encoding_dim, activation='relu')(inputs)
model = Model(inputs, encoded)
self.encoder = model
return model

def _decoder(self):
decshape = self.x.shape[2]
inputs = Input(shape=(self.encoding_dim,))
decoded = Dense(decshape)(inputs)
model = Model(inputs, decoded)
self.decoder = model
return model

def encoder_decoder(self):
ec = self._encoder()
dc = self._decoder()

inputs = Input(shape=self.x[0].shape)
ec_out = ec(inputs)
dc_out = dc(ec_out)
model = Model(inputs, dc_out)

self.model = model
return model

def fit(self, batch_size=10000, epochs=300):
self.model.compile(optimizer='adam', loss='MSE')
log_dir = './log/'
tbCallBack = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)
self.model.fit(self.x, self.x,
epochs=epochs,
batch_size=batch_size,
callbacks=[tbCallBack])

def save(self):
if not os.path.exists(r'./weights'):
os.mkdir(r'./weights')
else:
self.encoder.save(r'./weights/encoder_weights.h5')
self.decoder.save(r'./weights/decoder_weights.h5')
self.model.save(r'./weights/ae_weights.h5')


if __name__ == '__main__':
seedy(2)
ae = AutoEncoder(encoding_dim=2)
ae.encoder_decoder()
ae.fit(batch_size=10000, epochs=300)
ae.save()


Testing:



import keras
from keras.models import load_model
import numpy as np
import json
from flatten_json import flatten
from sklearn.preprocessing import LabelEncoder
import time

vartime = time.time()
encoder = load_model(r'./weights/encoder_weights.h5')
decoder = load_model(r'./weights/decoder_weights.h5')

log_file = "./testfile.log"

hold =
with open(log_file, 'r') as file:
counter = 0
buffer = ''
data =
newdata =
text = file.readlines(40000000)
for line in text:
data =
counter = counter+1
buffer=json.loads(line)
buffer = flatten(buffer)
if counter%50000 == 0:
print(counter)
print(time.time()-vartime)
data.append((buffer.get("Key1","None")))
data.append((buffer.get("Key2","None")))
data.append((buffer.get("Key3","None")))
data.append(int((buffer.get("Key4","None"))[11:12]))
data.append((buffer.get("Key5", "None")))
data.append((buffer.get("Key6","None")))
data.append((buffer.get("Key7","None")))
newdata.append(data)
#print("TESTn")
#print(newdata)

newdata = np.array(newdata)
labelencoder_X_1 = LabelEncoder()
newdata[:, 0] = labelencoder_X_1.fit_transform(newdata[:,0])

labelencoder_X_2 = LabelEncoder()
newdata[:, 1] = labelencoder_X_2.fit_transform(newdata[:, 1])

labelencoder_X_3 = LabelEncoder()
newdata[:, 2] = labelencoder_X_3.fit_transform(newdata[:, 2])

labelencoder_X_4 = LabelEncoder()
newdata[:, 4] = labelencoder_X_4.fit_transform(newdata[:, 4])

labelencoder_X_5 = LabelEncoder()
newdata[:, 5] = labelencoder_X_5.fit_transform(newdata[:, 5])

labelencoder_X_6 = LabelEncoder()
newdata[:, 6] = labelencoder_X_6.fit_transform(newdata[:, 6])

newdata = keras.utils.to_categorical(newdata, dtype='float32')

inputs = newdata[0]
x = encoder.predict(inputs)
y = decoder.predict(x)

print('Input: {}'.format(inputs))
print('Encoded: {}'.format(x))
print('Decoded: {}'.format(y))


Error when running the testing portion:



x = encoder.predict(inputs)



'with shape ' + str(data_shape))



ValueError: Error when checking input: expected input_8 to have 3 dimensions, but got array with shape (7, 31)



Note:
When I do "inputs = newdata" to make it 3 dimensions, I get the same error but saying a 2d array was expected. I've even tried using logfile.log as the test file for the testing portion, as it's literally the same thing used in the training.



I also changed the actual key names to Key1, key2, key3... etc. as if I did not, the code looked extremely messy. I do know that I am correctly reading the files and one-hot encoding everything correctly. That shouldn't be the issue, but I can re-implement the original key names if absolutely necessary.



Any help would be greatly appreciated. Let me know if any more information is needed, or if this is the wrong place for this question.



Thanks!










share|improve this question









New contributor




NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.







$endgroup$

















    1












    $begingroup$


    Thanks for taking a look!



    I have an auto-encoder that I am trying to use for anomaly detection. I have 2 log files, logfile.log and testfile.log. They're essentially the same logfile, I just split them for training and testing purposes. The logfile consists of millions of lines, where each line is an log entry or observation.



    I have already preprocessed the files a little bit, such that they are JSON dictionaries where each line is a dictionary representing an observation and each key represents a value in the dictionary. They are nested, but in my code I flatten it for simplicity's sake.



    My goal is to train the autoencoder on logfile.log and then run the model against testfile.log line by line and outputting the reconstruction error. Depending on how large the reconstruction error is, I will flag it. (Haven't started on that portion yet)



    Model:



    import keras
    from keras.layers import Input, Dense
    from keras.models import Model
    import numpy as np
    from tensorflow import set_random_seed
    import os
    import json
    from flatten_json import flatten
    import time
    from sklearn.preprocessing import LabelEncoder


    vartime=time.time()
    log_file = './logfile.log'
    def seedy(s):
    np.random.seed(s)
    set_random_seed(s)
    decshape = 0
    class AutoEncoder:
    def __init__(self, encoding_dim=31):
    self.encoding_dim = encoding_dim
    #r = lambda: np.random.randint(1, 5)
    newdata =

    with open(log_file, 'r') as file:
    counter = 0
    buffer = ''
    data =
    print(np.intp)
    text = file.readlines(40000000)
    for line in text:
    data =
    counter = counter+1
    buffer=json.loads(line)
    buffer = flatten(buffer)
    if counter%50000 == 0:
    print(counter)
    print(time.time()-vartime)
    data.append((buffer.get("Key1","None")))
    data.append((buffer.get("Key2","None")))
    data.append((buffer.get("Key3","None")))
    data.append(int((buffer.get("Key4","None"))[11:12]))
    data.append((buffer.get("key4", "None")))
    data.append((buffer.get("key5","None")))
    data.append((buffer.get("key6","None")))
    newdata.append(data)
    #print("TESTn")
    #print(newdata)

    self.x = np.array(newdata)
    labelencoder_X_1 = LabelEncoder()
    self.x[:, 0] = labelencoder_X_1.fit_transform(self.x[:,0])

    labelencoder_X_2 = LabelEncoder()
    self.x[:, 1] = labelencoder_X_2.fit_transform(self.x[:, 1])

    labelencoder_X_3 = LabelEncoder()
    self.x[:, 2] = labelencoder_X_3.fit_transform(self.x[:, 2])

    labelencoder_X_4 = LabelEncoder()
    self.x[:, 4] = labelencoder_X_4.fit_transform(self.x[:, 4])

    labelencoder_X_5 = LabelEncoder()
    self.x[:, 5] = labelencoder_X_5.fit_transform(self.x[:, 5])

    labelencoder_X_6 = LabelEncoder()
    self.x[:, 6] = labelencoder_X_6.fit_transform(self.x[:, 6])
    self.x = keras.utils.to_categorical(self.x, dtype='float32')

    #X_train, X_test = train_test_split(self.x, test_size = 0.1, random_state = 0)

    def _encoder(self):
    inputs = Input(shape=(self.x[0].shape))
    #keras.layers.BatchNormalization()
    encoded = Dense(self.encoding_dim, activation='relu')(inputs)
    model = Model(inputs, encoded)
    self.encoder = model
    return model

    def _decoder(self):
    decshape = self.x.shape[2]
    inputs = Input(shape=(self.encoding_dim,))
    decoded = Dense(decshape)(inputs)
    model = Model(inputs, decoded)
    self.decoder = model
    return model

    def encoder_decoder(self):
    ec = self._encoder()
    dc = self._decoder()

    inputs = Input(shape=self.x[0].shape)
    ec_out = ec(inputs)
    dc_out = dc(ec_out)
    model = Model(inputs, dc_out)

    self.model = model
    return model

    def fit(self, batch_size=10000, epochs=300):
    self.model.compile(optimizer='adam', loss='MSE')
    log_dir = './log/'
    tbCallBack = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)
    self.model.fit(self.x, self.x,
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[tbCallBack])

    def save(self):
    if not os.path.exists(r'./weights'):
    os.mkdir(r'./weights')
    else:
    self.encoder.save(r'./weights/encoder_weights.h5')
    self.decoder.save(r'./weights/decoder_weights.h5')
    self.model.save(r'./weights/ae_weights.h5')


    if __name__ == '__main__':
    seedy(2)
    ae = AutoEncoder(encoding_dim=2)
    ae.encoder_decoder()
    ae.fit(batch_size=10000, epochs=300)
    ae.save()


    Testing:



    import keras
    from keras.models import load_model
    import numpy as np
    import json
    from flatten_json import flatten
    from sklearn.preprocessing import LabelEncoder
    import time

    vartime = time.time()
    encoder = load_model(r'./weights/encoder_weights.h5')
    decoder = load_model(r'./weights/decoder_weights.h5')

    log_file = "./testfile.log"

    hold =
    with open(log_file, 'r') as file:
    counter = 0
    buffer = ''
    data =
    newdata =
    text = file.readlines(40000000)
    for line in text:
    data =
    counter = counter+1
    buffer=json.loads(line)
    buffer = flatten(buffer)
    if counter%50000 == 0:
    print(counter)
    print(time.time()-vartime)
    data.append((buffer.get("Key1","None")))
    data.append((buffer.get("Key2","None")))
    data.append((buffer.get("Key3","None")))
    data.append(int((buffer.get("Key4","None"))[11:12]))
    data.append((buffer.get("Key5", "None")))
    data.append((buffer.get("Key6","None")))
    data.append((buffer.get("Key7","None")))
    newdata.append(data)
    #print("TESTn")
    #print(newdata)

    newdata = np.array(newdata)
    labelencoder_X_1 = LabelEncoder()
    newdata[:, 0] = labelencoder_X_1.fit_transform(newdata[:,0])

    labelencoder_X_2 = LabelEncoder()
    newdata[:, 1] = labelencoder_X_2.fit_transform(newdata[:, 1])

    labelencoder_X_3 = LabelEncoder()
    newdata[:, 2] = labelencoder_X_3.fit_transform(newdata[:, 2])

    labelencoder_X_4 = LabelEncoder()
    newdata[:, 4] = labelencoder_X_4.fit_transform(newdata[:, 4])

    labelencoder_X_5 = LabelEncoder()
    newdata[:, 5] = labelencoder_X_5.fit_transform(newdata[:, 5])

    labelencoder_X_6 = LabelEncoder()
    newdata[:, 6] = labelencoder_X_6.fit_transform(newdata[:, 6])

    newdata = keras.utils.to_categorical(newdata, dtype='float32')

    inputs = newdata[0]
    x = encoder.predict(inputs)
    y = decoder.predict(x)

    print('Input: {}'.format(inputs))
    print('Encoded: {}'.format(x))
    print('Decoded: {}'.format(y))


    Error when running the testing portion:



    x = encoder.predict(inputs)



    'with shape ' + str(data_shape))



    ValueError: Error when checking input: expected input_8 to have 3 dimensions, but got array with shape (7, 31)



    Note:
    When I do "inputs = newdata" to make it 3 dimensions, I get the same error but saying a 2d array was expected. I've even tried using logfile.log as the test file for the testing portion, as it's literally the same thing used in the training.



    I also changed the actual key names to Key1, key2, key3... etc. as if I did not, the code looked extremely messy. I do know that I am correctly reading the files and one-hot encoding everything correctly. That shouldn't be the issue, but I can re-implement the original key names if absolutely necessary.



    Any help would be greatly appreciated. Let me know if any more information is needed, or if this is the wrong place for this question.



    Thanks!










    share|improve this question









    New contributor




    NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
    Check out our Code of Conduct.







    $endgroup$















      1












      1








      1


      0



      $begingroup$


      Thanks for taking a look!



      I have an auto-encoder that I am trying to use for anomaly detection. I have 2 log files, logfile.log and testfile.log. They're essentially the same logfile, I just split them for training and testing purposes. The logfile consists of millions of lines, where each line is an log entry or observation.



      I have already preprocessed the files a little bit, such that they are JSON dictionaries where each line is a dictionary representing an observation and each key represents a value in the dictionary. They are nested, but in my code I flatten it for simplicity's sake.



      My goal is to train the autoencoder on logfile.log and then run the model against testfile.log line by line and outputting the reconstruction error. Depending on how large the reconstruction error is, I will flag it. (Haven't started on that portion yet)



      Model:



      import keras
      from keras.layers import Input, Dense
      from keras.models import Model
      import numpy as np
      from tensorflow import set_random_seed
      import os
      import json
      from flatten_json import flatten
      import time
      from sklearn.preprocessing import LabelEncoder


      vartime=time.time()
      log_file = './logfile.log'
      def seedy(s):
      np.random.seed(s)
      set_random_seed(s)
      decshape = 0
      class AutoEncoder:
      def __init__(self, encoding_dim=31):
      self.encoding_dim = encoding_dim
      #r = lambda: np.random.randint(1, 5)
      newdata =

      with open(log_file, 'r') as file:
      counter = 0
      buffer = ''
      data =
      print(np.intp)
      text = file.readlines(40000000)
      for line in text:
      data =
      counter = counter+1
      buffer=json.loads(line)
      buffer = flatten(buffer)
      if counter%50000 == 0:
      print(counter)
      print(time.time()-vartime)
      data.append((buffer.get("Key1","None")))
      data.append((buffer.get("Key2","None")))
      data.append((buffer.get("Key3","None")))
      data.append(int((buffer.get("Key4","None"))[11:12]))
      data.append((buffer.get("key4", "None")))
      data.append((buffer.get("key5","None")))
      data.append((buffer.get("key6","None")))
      newdata.append(data)
      #print("TESTn")
      #print(newdata)

      self.x = np.array(newdata)
      labelencoder_X_1 = LabelEncoder()
      self.x[:, 0] = labelencoder_X_1.fit_transform(self.x[:,0])

      labelencoder_X_2 = LabelEncoder()
      self.x[:, 1] = labelencoder_X_2.fit_transform(self.x[:, 1])

      labelencoder_X_3 = LabelEncoder()
      self.x[:, 2] = labelencoder_X_3.fit_transform(self.x[:, 2])

      labelencoder_X_4 = LabelEncoder()
      self.x[:, 4] = labelencoder_X_4.fit_transform(self.x[:, 4])

      labelencoder_X_5 = LabelEncoder()
      self.x[:, 5] = labelencoder_X_5.fit_transform(self.x[:, 5])

      labelencoder_X_6 = LabelEncoder()
      self.x[:, 6] = labelencoder_X_6.fit_transform(self.x[:, 6])
      self.x = keras.utils.to_categorical(self.x, dtype='float32')

      #X_train, X_test = train_test_split(self.x, test_size = 0.1, random_state = 0)

      def _encoder(self):
      inputs = Input(shape=(self.x[0].shape))
      #keras.layers.BatchNormalization()
      encoded = Dense(self.encoding_dim, activation='relu')(inputs)
      model = Model(inputs, encoded)
      self.encoder = model
      return model

      def _decoder(self):
      decshape = self.x.shape[2]
      inputs = Input(shape=(self.encoding_dim,))
      decoded = Dense(decshape)(inputs)
      model = Model(inputs, decoded)
      self.decoder = model
      return model

      def encoder_decoder(self):
      ec = self._encoder()
      dc = self._decoder()

      inputs = Input(shape=self.x[0].shape)
      ec_out = ec(inputs)
      dc_out = dc(ec_out)
      model = Model(inputs, dc_out)

      self.model = model
      return model

      def fit(self, batch_size=10000, epochs=300):
      self.model.compile(optimizer='adam', loss='MSE')
      log_dir = './log/'
      tbCallBack = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)
      self.model.fit(self.x, self.x,
      epochs=epochs,
      batch_size=batch_size,
      callbacks=[tbCallBack])

      def save(self):
      if not os.path.exists(r'./weights'):
      os.mkdir(r'./weights')
      else:
      self.encoder.save(r'./weights/encoder_weights.h5')
      self.decoder.save(r'./weights/decoder_weights.h5')
      self.model.save(r'./weights/ae_weights.h5')


      if __name__ == '__main__':
      seedy(2)
      ae = AutoEncoder(encoding_dim=2)
      ae.encoder_decoder()
      ae.fit(batch_size=10000, epochs=300)
      ae.save()


      Testing:



      import keras
      from keras.models import load_model
      import numpy as np
      import json
      from flatten_json import flatten
      from sklearn.preprocessing import LabelEncoder
      import time

      vartime = time.time()
      encoder = load_model(r'./weights/encoder_weights.h5')
      decoder = load_model(r'./weights/decoder_weights.h5')

      log_file = "./testfile.log"

      hold =
      with open(log_file, 'r') as file:
      counter = 0
      buffer = ''
      data =
      newdata =
      text = file.readlines(40000000)
      for line in text:
      data =
      counter = counter+1
      buffer=json.loads(line)
      buffer = flatten(buffer)
      if counter%50000 == 0:
      print(counter)
      print(time.time()-vartime)
      data.append((buffer.get("Key1","None")))
      data.append((buffer.get("Key2","None")))
      data.append((buffer.get("Key3","None")))
      data.append(int((buffer.get("Key4","None"))[11:12]))
      data.append((buffer.get("Key5", "None")))
      data.append((buffer.get("Key6","None")))
      data.append((buffer.get("Key7","None")))
      newdata.append(data)
      #print("TESTn")
      #print(newdata)

      newdata = np.array(newdata)
      labelencoder_X_1 = LabelEncoder()
      newdata[:, 0] = labelencoder_X_1.fit_transform(newdata[:,0])

      labelencoder_X_2 = LabelEncoder()
      newdata[:, 1] = labelencoder_X_2.fit_transform(newdata[:, 1])

      labelencoder_X_3 = LabelEncoder()
      newdata[:, 2] = labelencoder_X_3.fit_transform(newdata[:, 2])

      labelencoder_X_4 = LabelEncoder()
      newdata[:, 4] = labelencoder_X_4.fit_transform(newdata[:, 4])

      labelencoder_X_5 = LabelEncoder()
      newdata[:, 5] = labelencoder_X_5.fit_transform(newdata[:, 5])

      labelencoder_X_6 = LabelEncoder()
      newdata[:, 6] = labelencoder_X_6.fit_transform(newdata[:, 6])

      newdata = keras.utils.to_categorical(newdata, dtype='float32')

      inputs = newdata[0]
      x = encoder.predict(inputs)
      y = decoder.predict(x)

      print('Input: {}'.format(inputs))
      print('Encoded: {}'.format(x))
      print('Decoded: {}'.format(y))


      Error when running the testing portion:



      x = encoder.predict(inputs)



      'with shape ' + str(data_shape))



      ValueError: Error when checking input: expected input_8 to have 3 dimensions, but got array with shape (7, 31)



      Note:
      When I do "inputs = newdata" to make it 3 dimensions, I get the same error but saying a 2d array was expected. I've even tried using logfile.log as the test file for the testing portion, as it's literally the same thing used in the training.



      I also changed the actual key names to Key1, key2, key3... etc. as if I did not, the code looked extremely messy. I do know that I am correctly reading the files and one-hot encoding everything correctly. That shouldn't be the issue, but I can re-implement the original key names if absolutely necessary.



      Any help would be greatly appreciated. Let me know if any more information is needed, or if this is the wrong place for this question.



      Thanks!










      share|improve this question









      New contributor




      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.







      $endgroup$




      Thanks for taking a look!



      I have an auto-encoder that I am trying to use for anomaly detection. I have 2 log files, logfile.log and testfile.log. They're essentially the same logfile, I just split them for training and testing purposes. The logfile consists of millions of lines, where each line is an log entry or observation.



      I have already preprocessed the files a little bit, such that they are JSON dictionaries where each line is a dictionary representing an observation and each key represents a value in the dictionary. They are nested, but in my code I flatten it for simplicity's sake.



      My goal is to train the autoencoder on logfile.log and then run the model against testfile.log line by line and outputting the reconstruction error. Depending on how large the reconstruction error is, I will flag it. (Haven't started on that portion yet)



      Model:



      import keras
      from keras.layers import Input, Dense
      from keras.models import Model
      import numpy as np
      from tensorflow import set_random_seed
      import os
      import json
      from flatten_json import flatten
      import time
      from sklearn.preprocessing import LabelEncoder


      vartime=time.time()
      log_file = './logfile.log'
      def seedy(s):
      np.random.seed(s)
      set_random_seed(s)
      decshape = 0
      class AutoEncoder:
      def __init__(self, encoding_dim=31):
      self.encoding_dim = encoding_dim
      #r = lambda: np.random.randint(1, 5)
      newdata =

      with open(log_file, 'r') as file:
      counter = 0
      buffer = ''
      data =
      print(np.intp)
      text = file.readlines(40000000)
      for line in text:
      data =
      counter = counter+1
      buffer=json.loads(line)
      buffer = flatten(buffer)
      if counter%50000 == 0:
      print(counter)
      print(time.time()-vartime)
      data.append((buffer.get("Key1","None")))
      data.append((buffer.get("Key2","None")))
      data.append((buffer.get("Key3","None")))
      data.append(int((buffer.get("Key4","None"))[11:12]))
      data.append((buffer.get("key4", "None")))
      data.append((buffer.get("key5","None")))
      data.append((buffer.get("key6","None")))
      newdata.append(data)
      #print("TESTn")
      #print(newdata)

      self.x = np.array(newdata)
      labelencoder_X_1 = LabelEncoder()
      self.x[:, 0] = labelencoder_X_1.fit_transform(self.x[:,0])

      labelencoder_X_2 = LabelEncoder()
      self.x[:, 1] = labelencoder_X_2.fit_transform(self.x[:, 1])

      labelencoder_X_3 = LabelEncoder()
      self.x[:, 2] = labelencoder_X_3.fit_transform(self.x[:, 2])

      labelencoder_X_4 = LabelEncoder()
      self.x[:, 4] = labelencoder_X_4.fit_transform(self.x[:, 4])

      labelencoder_X_5 = LabelEncoder()
      self.x[:, 5] = labelencoder_X_5.fit_transform(self.x[:, 5])

      labelencoder_X_6 = LabelEncoder()
      self.x[:, 6] = labelencoder_X_6.fit_transform(self.x[:, 6])
      self.x = keras.utils.to_categorical(self.x, dtype='float32')

      #X_train, X_test = train_test_split(self.x, test_size = 0.1, random_state = 0)

      def _encoder(self):
      inputs = Input(shape=(self.x[0].shape))
      #keras.layers.BatchNormalization()
      encoded = Dense(self.encoding_dim, activation='relu')(inputs)
      model = Model(inputs, encoded)
      self.encoder = model
      return model

      def _decoder(self):
      decshape = self.x.shape[2]
      inputs = Input(shape=(self.encoding_dim,))
      decoded = Dense(decshape)(inputs)
      model = Model(inputs, decoded)
      self.decoder = model
      return model

      def encoder_decoder(self):
      ec = self._encoder()
      dc = self._decoder()

      inputs = Input(shape=self.x[0].shape)
      ec_out = ec(inputs)
      dc_out = dc(ec_out)
      model = Model(inputs, dc_out)

      self.model = model
      return model

      def fit(self, batch_size=10000, epochs=300):
      self.model.compile(optimizer='adam', loss='MSE')
      log_dir = './log/'
      tbCallBack = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=True, write_images=True)
      self.model.fit(self.x, self.x,
      epochs=epochs,
      batch_size=batch_size,
      callbacks=[tbCallBack])

      def save(self):
      if not os.path.exists(r'./weights'):
      os.mkdir(r'./weights')
      else:
      self.encoder.save(r'./weights/encoder_weights.h5')
      self.decoder.save(r'./weights/decoder_weights.h5')
      self.model.save(r'./weights/ae_weights.h5')


      if __name__ == '__main__':
      seedy(2)
      ae = AutoEncoder(encoding_dim=2)
      ae.encoder_decoder()
      ae.fit(batch_size=10000, epochs=300)
      ae.save()


      Testing:



      import keras
      from keras.models import load_model
      import numpy as np
      import json
      from flatten_json import flatten
      from sklearn.preprocessing import LabelEncoder
      import time

      vartime = time.time()
      encoder = load_model(r'./weights/encoder_weights.h5')
      decoder = load_model(r'./weights/decoder_weights.h5')

      log_file = "./testfile.log"

      hold =
      with open(log_file, 'r') as file:
      counter = 0
      buffer = ''
      data =
      newdata =
      text = file.readlines(40000000)
      for line in text:
      data =
      counter = counter+1
      buffer=json.loads(line)
      buffer = flatten(buffer)
      if counter%50000 == 0:
      print(counter)
      print(time.time()-vartime)
      data.append((buffer.get("Key1","None")))
      data.append((buffer.get("Key2","None")))
      data.append((buffer.get("Key3","None")))
      data.append(int((buffer.get("Key4","None"))[11:12]))
      data.append((buffer.get("Key5", "None")))
      data.append((buffer.get("Key6","None")))
      data.append((buffer.get("Key7","None")))
      newdata.append(data)
      #print("TESTn")
      #print(newdata)

      newdata = np.array(newdata)
      labelencoder_X_1 = LabelEncoder()
      newdata[:, 0] = labelencoder_X_1.fit_transform(newdata[:,0])

      labelencoder_X_2 = LabelEncoder()
      newdata[:, 1] = labelencoder_X_2.fit_transform(newdata[:, 1])

      labelencoder_X_3 = LabelEncoder()
      newdata[:, 2] = labelencoder_X_3.fit_transform(newdata[:, 2])

      labelencoder_X_4 = LabelEncoder()
      newdata[:, 4] = labelencoder_X_4.fit_transform(newdata[:, 4])

      labelencoder_X_5 = LabelEncoder()
      newdata[:, 5] = labelencoder_X_5.fit_transform(newdata[:, 5])

      labelencoder_X_6 = LabelEncoder()
      newdata[:, 6] = labelencoder_X_6.fit_transform(newdata[:, 6])

      newdata = keras.utils.to_categorical(newdata, dtype='float32')

      inputs = newdata[0]
      x = encoder.predict(inputs)
      y = decoder.predict(x)

      print('Input: {}'.format(inputs))
      print('Encoded: {}'.format(x))
      print('Decoded: {}'.format(y))


      Error when running the testing portion:



      x = encoder.predict(inputs)



      'with shape ' + str(data_shape))



      ValueError: Error when checking input: expected input_8 to have 3 dimensions, but got array with shape (7, 31)



      Note:
      When I do "inputs = newdata" to make it 3 dimensions, I get the same error but saying a 2d array was expected. I've even tried using logfile.log as the test file for the testing portion, as it's literally the same thing used in the training.



      I also changed the actual key names to Key1, key2, key3... etc. as if I did not, the code looked extremely messy. I do know that I am correctly reading the files and one-hot encoding everything correctly. That shouldn't be the issue, but I can re-implement the original key names if absolutely necessary.



      Any help would be greatly appreciated. Let me know if any more information is needed, or if this is the wrong place for this question.



      Thanks!







      machine-learning deep-learning keras autoencoder






      share|improve this question









      New contributor




      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.











      share|improve this question









      New contributor




      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      share|improve this question




      share|improve this question








      edited yesterday







      NeedingMLHelp













      New contributor




      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.









      asked yesterday









      NeedingMLHelpNeedingMLHelp

      62




      62




      New contributor




      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.





      New contributor





      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






      NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
      Check out our Code of Conduct.






















          1 Answer
          1






          active

          oldest

          votes


















          0












          $begingroup$

          I can't say I know how to accomplish my intended goal, but I can say that my approach was faulty. The prediction in an auto-encoder is for regenerating the input, adding another prediction in the testing phase is rather wonky.



          I will continue looking into ways to develop reconstruction error.






          share|improve this answer








          New contributor




          NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
          Check out our Code of Conduct.






          $endgroup$













            Your Answer





            StackExchange.ifUsing("editor", function () {
            return StackExchange.using("mathjaxEditing", function () {
            StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
            StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["$", "$"], ["\\(","\\)"]]);
            });
            });
            }, "mathjax-editing");

            StackExchange.ready(function() {
            var channelOptions = {
            tags: "".split(" "),
            id: "557"
            };
            initTagRenderer("".split(" "), "".split(" "), channelOptions);

            StackExchange.using("externalEditor", function() {
            // Have to fire editor after snippets, if snippets enabled
            if (StackExchange.settings.snippets.snippetsEnabled) {
            StackExchange.using("snippets", function() {
            createEditor();
            });
            }
            else {
            createEditor();
            }
            });

            function createEditor() {
            StackExchange.prepareEditor({
            heartbeatType: 'answer',
            autoActivateHeartbeat: false,
            convertImagesToLinks: false,
            noModals: true,
            showLowRepImageUploadWarning: true,
            reputationToPostImages: null,
            bindNavPrevention: true,
            postfix: "",
            imageUploader: {
            brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
            contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
            allowUrls: true
            },
            onDemand: true,
            discardSelector: ".discard-answer"
            ,immediatelyShowMarkdownHelp:true
            });


            }
            });






            NeedingMLHelp is a new contributor. Be nice, and check out our Code of Conduct.










            draft saved

            draft discarded


















            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f45969%2fautoencoder-dimensionality-error%23new-answer', 'question_page');
            }
            );

            Post as a guest















            Required, but never shown

























            1 Answer
            1






            active

            oldest

            votes








            1 Answer
            1






            active

            oldest

            votes









            active

            oldest

            votes






            active

            oldest

            votes









            0












            $begingroup$

            I can't say I know how to accomplish my intended goal, but I can say that my approach was faulty. The prediction in an auto-encoder is for regenerating the input, adding another prediction in the testing phase is rather wonky.



            I will continue looking into ways to develop reconstruction error.






            share|improve this answer








            New contributor




            NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
            Check out our Code of Conduct.






            $endgroup$


















              0












              $begingroup$

              I can't say I know how to accomplish my intended goal, but I can say that my approach was faulty. The prediction in an auto-encoder is for regenerating the input, adding another prediction in the testing phase is rather wonky.



              I will continue looking into ways to develop reconstruction error.






              share|improve this answer








              New contributor




              NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
              Check out our Code of Conduct.






              $endgroup$
















                0












                0








                0





                $begingroup$

                I can't say I know how to accomplish my intended goal, but I can say that my approach was faulty. The prediction in an auto-encoder is for regenerating the input, adding another prediction in the testing phase is rather wonky.



                I will continue looking into ways to develop reconstruction error.






                share|improve this answer








                New contributor




                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






                $endgroup$



                I can't say I know how to accomplish my intended goal, but I can say that my approach was faulty. The prediction in an auto-encoder is for regenerating the input, adding another prediction in the testing phase is rather wonky.



                I will continue looking into ways to develop reconstruction error.







                share|improve this answer








                New contributor




                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                share|improve this answer



                share|improve this answer






                New contributor




                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.









                answered 14 hours ago









                NeedingMLHelpNeedingMLHelp

                62




                62




                New contributor




                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.





                New contributor





                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






                NeedingMLHelp is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
                Check out our Code of Conduct.






















                    NeedingMLHelp is a new contributor. Be nice, and check out our Code of Conduct.










                    draft saved

                    draft discarded


















                    NeedingMLHelp is a new contributor. Be nice, and check out our Code of Conduct.













                    NeedingMLHelp is a new contributor. Be nice, and check out our Code of Conduct.












                    NeedingMLHelp is a new contributor. Be nice, and check out our Code of Conduct.
















                    Thanks for contributing an answer to Data Science Stack Exchange!


                    • Please be sure to answer the question. Provide details and share your research!

                    But avoid



                    • Asking for help, clarification, or responding to other answers.

                    • Making statements based on opinion; back them up with references or personal experience.


                    Use MathJax to format equations. MathJax reference.


                    To learn more, see our tips on writing great answers.




                    draft saved


                    draft discarded














                    StackExchange.ready(
                    function () {
                    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fdatascience.stackexchange.com%2fquestions%2f45969%2fautoencoder-dimensionality-error%23new-answer', 'question_page');
                    }
                    );

                    Post as a guest















                    Required, but never shown





















































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown

































                    Required, but never shown














                    Required, but never shown












                    Required, but never shown







                    Required, but never shown







                    Popular posts from this blog

                    How to label and detect the document text images

                    Vallis Paradisi

                    Tabula Rosettana