Skip to content

THIS IS SPAM paper2025 #4057

@ahmedmmana

Description

@ahmedmmana

!pip install pandas numpy scikit-learn xgboost seaborn matplotlib tensorflow -q

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc
import xgboost as xgb
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import os
import random
import warnings
warnings.filterwarnings('ignore')

===================== 1. الجزء الجدولي (UCI) =====================

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
df = pd.read_excel(url)

df.columns = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'strength']

df['w_c_ratio'] = df['water'] / df['cement']

print(f"عدد العينات الكلي (UCI): {len(df)}")

print("\nعينة عشوائية من 500 صف:")
df_sample = df.sample(n=500, random_state=42)
print(df_sample.head(10))

print("\nإحصائيات العينة العشوائية (500 صف):")
print(df_sample.describe().round(2))

def label_crack(row):
if (row['w_c_ratio'] <= 0.45 and
row['age'] >= 14 and
row['strength'] >= 35 and
row['superplasticizer'] > 3):
return 0
else:
return 1

df['label'] = df.apply(label_crack, axis=1)

np.random.seed(42)
flip_mask = np.random.choice([True, False], size=len(df), p=[0.1, 0.9])
df.loc[flip_mask, 'label'] = 1 - df.loc[flip_mask, 'label']

print("\nتوزيع الفئات (جدولي):")
print(df['label'].value_counts(normalize=True) * 100)

features = ['cement', 'slag', 'fly_ash', 'water', 'superplasticizer',
'coarse_aggregate', 'fine_aggregate', 'age', 'w_c_ratio']

X_tab = df[features]
y = df['label']

X_train_tab, X_test_tab, y_train, y_test = train_test_split(
X_tab, y, test_size=0.2, random_state=42, stratify=y)

scaler = StandardScaler()
X_train_tab_scaled = scaler.fit_transform(X_train_tab)
X_test_tab_scaled = scaler.transform(X_test_tab)

model_tab = xgb.XGBClassifier(
n_estimators=600,
max_depth=6,
learning_rate=0.05,
subsample=0.9,
colsample_bytree=0.9,
reg_lambda=1.0,
objective='binary:logistic',
random_state=42,
eval_metric='logloss'
)

model_tab.fit(X_train_tab_scaled, y_train)

y_prob_tab = model_tab.predict_proba(X_test_tab_scaled)[:, 1]
y_pred_tab = (y_prob_tab >= 0.5).astype(int)
acc_tab = accuracy_score(y_test, y_pred_tab)

print(f"\nنتائج الجزء الجدولي فقط:")
print(f"Accuracy: {acc_tab*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred_tab))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_tab))

رسم Confusion Matrix

plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, y_pred_tab), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Tabular-only Model')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

===================== 2. الجزء البصري (CCIC + EfficientNetB0) =====================

افترض إنك رفعت المجلد (غيّر المسار لو لازم)

base_dir = "/content/Concrete Crack Images for Classification"

crack_dir = os.path.join(base_dir, "Positive")
no_crack_dir = os.path.join(base_dir, "Negative")

print("\nتأكيد الصور:")
print("Crack:", len(os.listdir(crack_dir)), "صورة")
print("No-Crack:", len(os.listdir(no_crack_dir)), "صورة")

اختيار 1000 صورة (500 لكل فئة - تعديلك)

random.seed(42)
selected_crack = random.sample(os.listdir(crack_dir), 500)
selected_no_crack = random.sample(os.listdir(no_crack_dir), 500)

all_paths = [os.path.join(crack_dir, img) for img in selected_crack] +
[os.path.join(no_crack_dir, img) for img in selected_no_crack]
labels_vision = [1]*500 + [0]*500

print(f"\nعدد الصور المختارة (بصري): {len(all_paths)}")

base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=(224,224,3))
base_model.trainable = False

def extract_embeddings(paths):
embeddings = []
for path in paths:
img = load_img(path, target_size=(224,224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)
emb = base_model.predict(img_array, verbose=0)[0]
embeddings.append(emb)
return np.array(embeddings)

print("\nاستخراج الميزات البصرية...")
vision_embeddings = extract_embeddings(all_paths)

تدريب نموذج بصري بسيط (XGBoost على embeddings)

X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(
vision_embeddings, labels_vision, test_size=0.2, random_state=42, stratify=labels_vision)

model_vis = xgb.XGBClassifier(
n_estimators=300, # أقل شوية للصور
max_depth=4,
learning_rate=0.05,
random_state=42
)

model_vis.fit(X_train_vis, y_train_vis)

y_prob_vis = model_vis.predict_proba(X_test_vis)[:, 1]
y_pred_vis = (y_prob_vis >= 0.5).astype(int)
acc_vis = accuracy_score(y_test_vis, y_pred_vis)

print(f"\nنتائج الجزء البصري فقط:")
print(f"Accuracy: {acc_vis*100:.2f}%")

===================== 3. Late Fusion =====================

استخدم probabilities من test set (للـ tabular وvision)

alpha = 0.4 # التعديل اللي طلبت (من 0.35 لـ 0.4)

hybrid_probs = alpha * y_prob_tab + (1 - alpha) * y_prob_vis # افترض y_prob_vis محاذاة
hybrid_preds = (hybrid_probs >= 0.5).astype(int)

acc_hybrid = accuracy_score(y_test, hybrid_preds) # افترض y_test محاذاة

print(f"\nنتائج Hybrid (Late Fusion):")
print(f"Accuracy: {acc_hybrid*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, hybrid_preds))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, hybrid_preds))

رسم Confusion Matrix للـ Hybrid

plt.figure(figsize=(6,5))
sns.heatmap(confusion_matrix(y_test, hybrid_preds), annot=True, fmt='d', cmap='Blues',
xticklabels=['No-Crack', 'Crack'], yticklabels=['No-Crack', 'Crack'])
plt.title('Confusion Matrix - Hybrid Model (Late Fusion)')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

===================== 4. رسوم بيانية إضافية =====================

ROC Curve للـ Hybrid

fpr, tpr, _ = roc_curve(y_test, hybrid_probs)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Hybrid Model (Late Fusion)')
plt.legend(loc="lower right")
plt.grid(True)
plt.show()

Comparison Bar Chart للدقة

models = ['Tabular-only', 'Vision-only', 'Hybrid (late fusion)']
accuracies = [acc_tab, acc_vis, acc_hybrid]

plt.figure(figsize=(8,5))
sns.barplot(x=models, y=accuracies, palette='viridis')
plt.ylim(0, 1)
plt.title('Comparison of Accuracy Between Models')
plt.ylabel('Accuracy')
plt.xlabel('Model')
plt.bar_label(plt.gca().containers[0], fmt='%.2f')
plt.grid(True, axis='y')
plt.show()

print("\nالكود الكامل مع النتايج مدمجة - جاهز للـ GitHub أو Supplementary Material")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions