-
-
Notifications
You must be signed in to change notification settings - Fork 1.5k
Description
!pip install pandas numpy scikit-learn xgboost seaborn matplotlib tensorflow -q
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import random
import warnings
warnings.filterwarnings('ignore')
===================== 1. الجزء الجدولي (UCI) =====================
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
df = pd.read_excel(url)
df.columns = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'strength']
df['w_c_ratio'] = df['water'] / df['cement']
print(f"عدد العينات الكلي (UCI): {len(df)}")
print("\nعينة عشوائية من 500 صف:")
df_sample = df.sample(n=500, random_state=42)
print(df_sample.head(10))
print("\nإحصائيات العينة العشوائية (500 صف):")
print(df_sample.describe().round(2))
def label_crack(row):
if (row['w_c_ratio'] <= 0.45 and
row['age'] >= 14 and
row['strength'] >= 35 and
row['superplasticizer'] > 3):
return 0
else:
return 1
df['label'] = df.apply(label_crack, axis=1)
np.random.seed(42)
flip_mask = np.random.choice([True, False], size=len(df), p=[0.1, 0.9])
df.loc[flip_mask, 'label'] = 1 - df.loc[flip_mask, 'label']
print("\nتوزيع الفئات (جدولي):")
print(df['label'].value_counts(normalize=True) * 100)
features = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'w_c_ratio']
X_tab = df[features]
y = df['label']
X_train_tab, X_test_tab, y_train, y_test = train_test_split(
X_tab, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_tab_scaled = scaler.fit_transform(X_train_tab)
X_test_tab_scaled = scaler.transform(X_test_tab)
model_tab = xgb.XGBClassifier(
n_estimators=600,
max_depth=6,
learning_rate=0.05,
subsample=0.9,
colsample_bytree=0.9,
reg_lambda=1.0,
objective='binary:logistic',
random_state=42,
eval_metric='logloss'
)
model_tab.fit(X_train_tab_scaled, y_train)
y_prob_tab = model_tab.predict_proba(X_test_tab_scaled)[:, 1]
y_pred_tab = (y_prob_tab >= 0.5).astype(int)
acc_tab = accuracy_score(y_test, y_pred_tab)
print(f"\nنتائج الجزء الجدولي فقط:")
print(f"Accuracy: {acc_tab*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred_tab))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_tab))
رسم Confusion Matrix
plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, y_pred_tab), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Tabular-only Model')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
===================== 2. الجزء البصري (CCIC + EfficientNetB0) =====================
افترض إنك رفعت المجلد (غيّر المسار لو لازم)
base_dir = "/content/Concrete Crack Images for Classification"
crack_dir = os.path.join(base_dir, "Positive")
no_crack_dir = os.path.join(base_dir, "Negative")
print("\nتأكيد الصور:")
print("Crack:", len(os.listdir(crack_dir)), "صورة")
print("No-Crack:", len(os.listdir(no_crack_dir)), "صورة")
اختيار 1000 صورة (500 لكل فئة - تعديلك)
random.seed(42)
selected_crack = random.sample(os.listdir(crack_dir), 500)
selected_no_crack = random.sample(os.listdir(no_crack_dir), 500)
all_paths = [os.path.join(crack_dir, img) for img in selected_crack] +
[os.path.join(no_crack_dir, img) for img in selected_no_crack]
labels_vision = [1]*500 + [0]*500
print(f"\nعدد الصور المختارة (بصري): {len(all_paths)}")
base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
base_model.trainable = False
def extract_embeddings(paths):
embeddings = []
for path in paths:
img = load_img(path, target_size=(224,224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)
emb = base_model.predict(img_array, verbose=0)[0]
embeddings.append(emb)
return np.array(embeddings)
print("\nاستخراج الميزات البصرية...")
vision_embeddings = extract_embeddings(all_paths)
تدريب نموذج بصري بسيط (XGBoost على embeddings)
X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(
vision_embeddings, labels_vision, test_size=0.2, random_state=42, stratify=labels_vision)
model_vis = xgb.XGBClassifier(
n_estimators=300, # أقل شوية للصور
max_depth=4,
learning_rate=0.05,
random_state=42
)
model_vis.fit(X_train_vis, y_train_vis)
y_prob_vis = model_vis.predict_proba(X_test_vis)[:, 1]
y_pred_vis = (y_prob_vis >= 0.5).astype(int)
acc_vis = accuracy_score(y_test_vis, y_pred_vis)
print(f"\nنتائج الجزء البصري فقط:")
print(f"Accuracy: {acc_vis*100:.2f}%")
===================== 3. Late Fusion =====================
استخدم probabilities من test set (للـ tabular وvision)
alpha = 0.4 # التعديل اللي طلبت (من 0.35 لـ 0.4)
hybrid_probs = alpha * y_prob_tab + (1 - alpha) * y_prob_vis # افترض y_prob_vis محاذاة
hybrid_preds = (hybrid_probs >= 0.5).astype(int)
acc_hybrid = accuracy_score(y_test, hybrid_preds) # افترض y_test محاذاة
print(f"\nنتائج Hybrid (Late Fusion):")
print(f"Accuracy: {acc_hybrid*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, hybrid_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, hybrid_preds))
رسم Confusion Matrix للـ Hybrid
plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, hybrid_preds), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Hybrid Model (Late Fusion)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()
===================== 4. رسوم بيانية إضافية =====================
ROC Curve للـ Hybrid
fpr, tpr, _ = roc_curve(y_test, hybrid_probs)
roc_auc = auc(fpr, tpr)
plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Hybrid Model (Late Fusion)')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()
Comparison Bar Chart للدقة
models = ['Tabular-only', 'Vision-only', 'Hybrid (late fusion)']
accuracies = [acc_tab, acc_vis, acc_hybrid]
plt.figure(figsize=(8,5))
sns.barplot(x=models, y=accuracies, palette='viridis')
plt.ylim(0, 1)
plt.title('Comparison of Accuracy Between Models')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.bar_label(plt.gca().containers[0], fmt='%.2f')
plt.grid(True, axis='y')
plt.show()
print("\nالكود الكامل مع النتايج مدمجة - جاهز للـ GitHub أو Supplementary Material")