diff --git a/webapp/Dockerfile b/webapp/Dockerfile
index 21d1907..94aaa83 100644
--- a/webapp/Dockerfile
+++ b/webapp/Dockerfile
@@ -1,4 +1,5 @@
-FROM python:3.7
+# Stage 1: Build stage (dependencies and compilation)
+FROM python:3.12-slim as build
# Create the required folders
RUN mkdir -p /webapp/models
@@ -6,32 +7,63 @@ RUN mkdir -p /webapp/models
# Copy everything
COPY . /webapp
+# Install dependencies for building (git, etc.)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ git \
+ build-essential \
+ apt-utils \
+ cron \
+ sqlite3 \
+ libsqlite3-dev
+
+# Install Python dependencies
+ARG USE_CPU_TORCH=false
+# NOTE: Allow building without GPU so as to lower image size (disabled by default)
+RUN pip install -U pip && if [ "$USE_CPU_TORCH" = "true" ]; then \
+ pip install -r /webapp/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/; \
+ else \
+ pip install -r /webapp/requirements.txt; \
+ fi
+
+# Get the spacy model (for later copy)
+RUN python -m spacy download en_core_web_md
+
+# Stage 2: Final (production) image
+FROM python:3.12-slim as final
+
+# Install runtime dependencies (you don’t need git in production)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ cron \
+ sqlite3 \
+ libsqlite3-dev && apt-get autoremove
+
+# Create the required folders (if not created already)
+RUN mkdir -p /webapp/models && mkdir -p /medcat_data
+
+# Copy only necessary files from build stage
+COPY --from=build /webapp /webapp
+# COPY --from=build /root/.cache /root/.cache # Copy pip cache if needed
+
+# Copy Python site-packages (installed by pip) from build stage
+COPY --from=build /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages
+
+# Set environment variables
ENV VOCAB_URL=https://medcat.rosalind.kcl.ac.uk/media/vocab.dat
ENV CDB_URL=https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1.dat
ENV CDB_PATH=/webapp/models/cdb.dat
ENV VOCAB_PATH=/webapp/models/vocab.dat
-# Create the data directory
-RUN mkdir -p /medcat_data
-
# Set the pythonpath
WORKDIR /webapp
-RUN pip install -r requirements.txt
-
-# Get the spacy model
-RUN python -m spacy download en_core_web_md
+# Create the db backup cron job (copied from your setup)
+COPY etc/cron.d/db-backup-cron /etc/cron.d/db-backup-cron
+RUN chmod 0644 /etc/cron.d/db-backup-cron && crontab /etc/cron.d/db-backup-cron
-# Build the db
+# Run migrations and collect static (could be in entrypoint script)
RUN python manage.py makemigrations && \
python manage.py makemigrations demo && \
python manage.py migrate && \
python manage.py migrate demo && \
python manage.py collectstatic --noinput
-
-# Create the db backup cron job
-RUN apt-get update && apt-get install -y --no-install-recommends apt-utils cron sqlite3 libsqlite3-dev
-COPY etc/cron.d/db-backup-cron /etc/cron.d/db-backup-cron
-RUN chmod 0644 /etc/cron.d/db-backup-cron
-RUN crontab /etc/cron.d/db-backup-cron
diff --git a/webapp/demo/forms.py b/webapp/demo/forms.py
index 100efc4..cbf7401 100644
--- a/webapp/demo/forms.py
+++ b/webapp/demo/forms.py
@@ -46,3 +46,8 @@ class Meta:
"funder": forms.TextInput(attrs={"size": 40}),
"use_case": forms.Textarea(attrs={"rows": 5, "cols": 40}),
}
+
+
+class UMLSApiKeyForm(forms.Form):
+ apikey = forms.CharField(label='UMLS API Key',
+ widget=forms.TextInput(attrs={'size': 50}))
diff --git a/webapp/demo/templates/umls_api_key_entry.html b/webapp/demo/templates/umls_api_key_entry.html
new file mode 100644
index 0000000..867790c
--- /dev/null
+++ b/webapp/demo/templates/umls_api_key_entry.html
@@ -0,0 +1,49 @@
+{% extends 'base.html' %}
+{% load static %}
+
+{% block style %}
+
+
+
+
+{% endblock %}
+
+{% block body %}
+
+
Please enter your UMLS API key to verify your license:
+
+ {% if message %}
+
+ {{ message }}
+
+ {% endif %}
+
+ {% if form.errors %}
+
+
+ {% for field in form %}
+ {% for error in field.errors %}
+ - {{ field.label }}: {{ error }}
+ {% endfor %}
+ {% endfor %}
+ {% for error in form.non_field_errors %}
+ - {{ error }}
+ {% endfor %}
+
+
+ {% endif %}
+
+
+
+
+ You can find your API key by logging into your UMLS account and visiting your UMLS Profile.
+
+
+{% endblock %}
diff --git a/webapp/demo/urls.py b/webapp/demo/urls.py
index 8919757..a884378 100644
--- a/webapp/demo/urls.py
+++ b/webapp/demo/urls.py
@@ -5,5 +5,6 @@
urlpatterns = [
path('', show_annotations, name='train_annotations'),
path('auth-callback', validate_umls_user, name='validate-umls-user'),
+ path('auth-callback-api', validate_umls_api_key, name='validate-umls-api-key'),
path('download-model', download_model, name="download-model")
]
diff --git a/webapp/demo/views.py b/webapp/demo/views.py
index 58d6bf6..21eb750 100644
--- a/webapp/demo/views.py
+++ b/webapp/demo/views.py
@@ -2,6 +2,7 @@
sys.path.insert(0, '/home/ubuntu/projects/MedCAT/')
import os
import json
+import requests
from django.shortcuts import render
from django.http import StreamingHttpResponse, HttpResponse
from wsgiref.util import FileWrapper
@@ -13,12 +14,17 @@
from urllib.error import HTTPError
#from medcat.meta_cat import MetaCAT
from .models import *
-from .forms import DownloaderForm
+from .forms import DownloaderForm, UMLSApiKeyForm
AUTH_CALLBACK_SERVICE = 'https://medcat.rosalind.kcl.ac.uk/auth-callback'
VALIDATION_BASE_URL = 'https://uts-ws.nlm.nih.gov/rest/isValidServiceValidate'
VALIDATION_LOGIN_URL = f'https://uts.nlm.nih.gov/uts/login?service={AUTH_CALLBACK_SERVICE}'
+API_KEY_AUTH_URL = 'https://utslogin.nlm.nih.gov/cas/v1/api-key'
+UMLS_SERVICE = 'http://umlsks.nlm.nih.gov' # required as 'service' parameter
+TEST_CUI = 'C0000005' # harmless, public CUI for validation
+CONTENT_API_URL = f'https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{TEST_CUI}'
+
model_pack_path = os.getenv('MODEL_PACK_PATH', 'models/medmen_wstatus_2021_oct.zip')
try:
@@ -101,6 +107,55 @@ def validate_umls_user(request):
return render(request, 'umls_user_validation.html', context=context)
+def validate_umls_api_key(request):
+ if request.method == 'POST':
+ form = UMLSApiKeyForm(request.POST)
+ if form.is_valid():
+ apikey = form.cleaned_data['apikey']
+ try:
+ # Step 1: Get TGT
+ r = requests.post(API_KEY_AUTH_URL, data={'apikey': apikey}, timeout=10)
+ if r.status_code != 201:
+ raise Exception('Invalid API key or auth server issue.')
+
+ tgt_url = r.headers['Location']
+
+ # Step 2: Get service ticket
+ r = requests.post(tgt_url, data={'service': UMLS_SERVICE}, timeout=10)
+ if r.status_code != 200:
+ raise Exception('Could not get service ticket.')
+
+ service_ticket = r.text.strip()
+
+ # Step 3: Use ticket to call a known endpoint
+ params = {'ticket': service_ticket}
+ r = requests.get(CONTENT_API_URL, params=params, timeout=10)
+
+ if r.status_code == 200:
+ context = {
+ 'is_valid': True,
+ 'message': 'License verified via API key!',
+ 'downloader_form': DownloaderForm(MedcatModel.objects.all())
+ }
+ else:
+ context = {
+ 'is_valid': False,
+ 'message': 'API key is not valid or user is not licensed for UMLS.'
+ }
+
+ except Exception as e:
+ context = {
+ 'is_valid': False,
+ 'message': f'Error validating API key: {str(e)}'
+ }
+
+ return render(request, 'umls_user_validation.html', context=context)
+ else:
+ form = UMLSApiKeyForm()
+
+ return render(request, 'umls_api_key_entry.html', {'form': form})
+
+
def download_model(request):
if request.method == 'POST':
downloader_form = DownloaderForm(MedcatModel.objects.all(), request.POST)
diff --git a/webapp/requirements.txt b/webapp/requirements.txt
index da15da6..80aaedc 100644
--- a/webapp/requirements.txt
+++ b/webapp/requirements.txt
@@ -2,5 +2,5 @@ Django==3.2.25
django-dbbackup==4.0.0b0
django-storages[boto3]==1.12.3
django-cron==0.5.1
-medcat~=1.15.0
+medcat~=1.16.0
urllib3==1.26.18