๐ฆ Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
๐ Load Data
df = pd.read_csv(“data.csv”) # ๐ Replace with your dataset
df = df.fillna(method=’ffill’) # โ
Fill missing values
๐งน Label Encoding for Categorical Columns
le = LabelEncoder()
for col in df.select_dtypes(include=’object’).columns:
df[col] = le.fit_transform(df[col])
๐ฏ Feature / Target Split
X = df.drop(“target”, axis=1) # ๐ Replace ‘target’ with actual label column
y = df[“target”]
๐ Scale Numeric Features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
๐ Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
โ SKLEARN MODELS
๐ฏ Random Forest
rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)
print(“\n[Random Forest]”)
print(“Accuracy:”, accuracy_score(y_test, rf_pred))
print(classification_report(y_test, rf_pred))
๐ฏ Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
print(“\n[Logistic Regression]”)
print(“Accuracy:”, accuracy_score(y_test, lr_pred))
print(classification_report(y_test, lr_pred))
๐ฏ Decision Tree
dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)
dt_pred = dt.predict(X_test)
print(“\n[Decision Tree]”)
print(“Accuracy:”, accuracy_score(y_test, dt_pred))
print(classification_report(y_test, dt_pred))
๐ฏ Support Vector Machine
svc = SVC()
svc.fit(X_train, y_train)
svc_pred = svc.predict(X_test)
print(“\n[SVM]”)
print(“Accuracy:”, accuracy_score(y_test, svc_pred))
print(classification_report(y_test, svc_pred))
โ TensorFlow Feedforward DNN
print(“\n[TensorFlow DNN]”)
output_units = 1 if len(np.unique(y)) == 2 else len(np.unique(y))
loss_fn = ‘binary_crossentropy’ if output_units == 1 else ‘sparse_categorical_crossentropy’
activation_fn = ‘sigmoid’ if output_units == 1 else ‘softmax’
model_dnn = Sequential()
model_dnn.add(Dense(128, activation=’relu’, input_shape=(X_train.shape[1],)))
model_dnn.add(Dense(64, activation=’relu’))
model_dnn.add(Dense(output_units, activation=activation_fn))
model_dnn.compile(optimizer=’adam’, loss=loss_fn, metrics=[‘accuracy’])
model_dnn.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.1)
loss, acc = model_dnn.evaluate(X_test, y_test)
print(“DNN Accuracy:”, acc)
โ CNN (Optional – For Image Data like MNIST)
”’
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.reshape(-1, 28, 28, 1) / 255.0
X_test = X_test.reshape(-1, 28, 28, 1) / 255.0
cnn_model = Sequential([
Conv2D(32, (3,3), activation=’relu’, input_shape=(28,28,1)),
MaxPooling2D(2,2),
Flatten(),
Dense(64, activation=’relu’),
Dense(10, activation=’softmax’)
])
cnn_model.compile(optimizer=’adam’, loss=’sparse_categorical_crossentropy’, metrics=[‘accuracy’])
cnn_model.fit(X_train, y_train, epochs=5, validation_split=0.1)
loss, acc = cnn_model.evaluate(X_test, y_test)
print(“CNN Accuracy:”, acc)
”’
โ LSTM (Optional – For Text Sequences)
”’
texts = df[“text_column”] # ๐ Replace with your actual column
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(texts)
X_seq = tokenizer.texts_to_sequences(texts)
X_pad = pad_sequences(X_seq, maxlen=100)
y = le.fit_transform(df[“target”]) # Reuse label encoder
X_train, X_test, y_train, y_test = train_test_split(X_pad, y, test_size=0.2, random_state=42)
lstm_model = Sequential([
Embedding(input_dim=10000, output_dim=64, input_length=100),
LSTM(64),
Dense(output_units, activation=activation_fn)
])
lstm_model.compile(optimizer=’adam’, loss=loss_fn, metrics=[‘accuracy’])
lstm_model.fit(X_train, y_train, epochs=5, batch_size=16, validation_split=0.1)
loss, acc = lstm_model.evaluate(X_test, y_test)
print(“LSTM Accuracy:”, acc)
”’