diff --git a/webapp/Dockerfile b/webapp/Dockerfile index 21d1907..94aaa83 100644 --- a/webapp/Dockerfile +++ b/webapp/Dockerfile @@ -1,4 +1,5 @@ -FROM python:3.7 +# Stage 1: Build stage (dependencies and compilation) +FROM python:3.12-slim as build # Create the required folders RUN mkdir -p /webapp/models @@ -6,32 +7,63 @@ RUN mkdir -p /webapp/models # Copy everything COPY . /webapp +# Install dependencies for building (git, etc.) +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + build-essential \ + apt-utils \ + cron \ + sqlite3 \ + libsqlite3-dev + +# Install Python dependencies +ARG USE_CPU_TORCH=false +# NOTE: Allow building without GPU so as to lower image size (disabled by default) +RUN pip install -U pip && if [ "$USE_CPU_TORCH" = "true" ]; then \ + pip install -r /webapp/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu/; \ + else \ + pip install -r /webapp/requirements.txt; \ + fi + +# Get the spacy model (for later copy) +RUN python -m spacy download en_core_web_md + +# Stage 2: Final (production) image +FROM python:3.12-slim as final + +# Install runtime dependencies (you don’t need git in production) +RUN apt-get update && apt-get install -y --no-install-recommends \ + cron \ + sqlite3 \ + libsqlite3-dev && apt-get autoremove + +# Create the required folders (if not created already) +RUN mkdir -p /webapp/models && mkdir -p /medcat_data + +# Copy only necessary files from build stage +COPY --from=build /webapp /webapp +# COPY --from=build /root/.cache /root/.cache # Copy pip cache if needed + +# Copy Python site-packages (installed by pip) from build stage +COPY --from=build /usr/local/lib/python3.12/site-packages /usr/local/lib/python3.12/site-packages + +# Set environment variables ENV VOCAB_URL=https://medcat.rosalind.kcl.ac.uk/media/vocab.dat ENV CDB_URL=https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1.dat ENV CDB_PATH=/webapp/models/cdb.dat ENV VOCAB_PATH=/webapp/models/vocab.dat -# Create the data directory -RUN mkdir -p /medcat_data - # Set the pythonpath WORKDIR /webapp -RUN pip install -r requirements.txt - -# Get the spacy model -RUN python -m spacy download en_core_web_md +# Create the db backup cron job (copied from your setup) +COPY etc/cron.d/db-backup-cron /etc/cron.d/db-backup-cron +RUN chmod 0644 /etc/cron.d/db-backup-cron && crontab /etc/cron.d/db-backup-cron -# Build the db +# Run migrations and collect static (could be in entrypoint script) RUN python manage.py makemigrations && \ python manage.py makemigrations demo && \ python manage.py migrate && \ python manage.py migrate demo && \ python manage.py collectstatic --noinput - -# Create the db backup cron job -RUN apt-get update && apt-get install -y --no-install-recommends apt-utils cron sqlite3 libsqlite3-dev -COPY etc/cron.d/db-backup-cron /etc/cron.d/db-backup-cron -RUN chmod 0644 /etc/cron.d/db-backup-cron -RUN crontab /etc/cron.d/db-backup-cron diff --git a/webapp/demo/forms.py b/webapp/demo/forms.py index 100efc4..cbf7401 100644 --- a/webapp/demo/forms.py +++ b/webapp/demo/forms.py @@ -46,3 +46,8 @@ class Meta: "funder": forms.TextInput(attrs={"size": 40}), "use_case": forms.Textarea(attrs={"rows": 5, "cols": 40}), } + + +class UMLSApiKeyForm(forms.Form): + apikey = forms.CharField(label='UMLS API Key', + widget=forms.TextInput(attrs={'size': 50})) diff --git a/webapp/demo/templates/umls_api_key_entry.html b/webapp/demo/templates/umls_api_key_entry.html new file mode 100644 index 0000000..867790c --- /dev/null +++ b/webapp/demo/templates/umls_api_key_entry.html @@ -0,0 +1,49 @@ +{% extends 'base.html' %} +{% load static %} + +{% block style %} + + + + +{% endblock %} + +{% block body %} +
+
Please enter your UMLS API key to verify your license:
+ + {% if message %} +
+ {{ message }} +
+ {% endif %} + + {% if form.errors %} +
+ +
+ {% endif %} + +
+ {% csrf_token %} +
+ {{ form.apikey.label_tag }} + {{ form.apikey }} +
+ +
+ +

+ You can find your API key by logging into your UMLS account and visiting your UMLS Profile. +

+
+{% endblock %} diff --git a/webapp/demo/urls.py b/webapp/demo/urls.py index 8919757..a884378 100644 --- a/webapp/demo/urls.py +++ b/webapp/demo/urls.py @@ -5,5 +5,6 @@ urlpatterns = [ path('', show_annotations, name='train_annotations'), path('auth-callback', validate_umls_user, name='validate-umls-user'), + path('auth-callback-api', validate_umls_api_key, name='validate-umls-api-key'), path('download-model', download_model, name="download-model") ] diff --git a/webapp/demo/views.py b/webapp/demo/views.py index 58d6bf6..21eb750 100644 --- a/webapp/demo/views.py +++ b/webapp/demo/views.py @@ -2,6 +2,7 @@ sys.path.insert(0, '/home/ubuntu/projects/MedCAT/') import os import json +import requests from django.shortcuts import render from django.http import StreamingHttpResponse, HttpResponse from wsgiref.util import FileWrapper @@ -13,12 +14,17 @@ from urllib.error import HTTPError #from medcat.meta_cat import MetaCAT from .models import * -from .forms import DownloaderForm +from .forms import DownloaderForm, UMLSApiKeyForm AUTH_CALLBACK_SERVICE = 'https://medcat.rosalind.kcl.ac.uk/auth-callback' VALIDATION_BASE_URL = 'https://uts-ws.nlm.nih.gov/rest/isValidServiceValidate' VALIDATION_LOGIN_URL = f'https://uts.nlm.nih.gov/uts/login?service={AUTH_CALLBACK_SERVICE}' +API_KEY_AUTH_URL = 'https://utslogin.nlm.nih.gov/cas/v1/api-key' +UMLS_SERVICE = 'http://umlsks.nlm.nih.gov' # required as 'service' parameter +TEST_CUI = 'C0000005' # harmless, public CUI for validation +CONTENT_API_URL = f'https://uts-ws.nlm.nih.gov/rest/content/current/CUI/{TEST_CUI}' + model_pack_path = os.getenv('MODEL_PACK_PATH', 'models/medmen_wstatus_2021_oct.zip') try: @@ -101,6 +107,55 @@ def validate_umls_user(request): return render(request, 'umls_user_validation.html', context=context) +def validate_umls_api_key(request): + if request.method == 'POST': + form = UMLSApiKeyForm(request.POST) + if form.is_valid(): + apikey = form.cleaned_data['apikey'] + try: + # Step 1: Get TGT + r = requests.post(API_KEY_AUTH_URL, data={'apikey': apikey}, timeout=10) + if r.status_code != 201: + raise Exception('Invalid API key or auth server issue.') + + tgt_url = r.headers['Location'] + + # Step 2: Get service ticket + r = requests.post(tgt_url, data={'service': UMLS_SERVICE}, timeout=10) + if r.status_code != 200: + raise Exception('Could not get service ticket.') + + service_ticket = r.text.strip() + + # Step 3: Use ticket to call a known endpoint + params = {'ticket': service_ticket} + r = requests.get(CONTENT_API_URL, params=params, timeout=10) + + if r.status_code == 200: + context = { + 'is_valid': True, + 'message': 'License verified via API key!', + 'downloader_form': DownloaderForm(MedcatModel.objects.all()) + } + else: + context = { + 'is_valid': False, + 'message': 'API key is not valid or user is not licensed for UMLS.' + } + + except Exception as e: + context = { + 'is_valid': False, + 'message': f'Error validating API key: {str(e)}' + } + + return render(request, 'umls_user_validation.html', context=context) + else: + form = UMLSApiKeyForm() + + return render(request, 'umls_api_key_entry.html', {'form': form}) + + def download_model(request): if request.method == 'POST': downloader_form = DownloaderForm(MedcatModel.objects.all(), request.POST) diff --git a/webapp/requirements.txt b/webapp/requirements.txt index da15da6..80aaedc 100644 --- a/webapp/requirements.txt +++ b/webapp/requirements.txt @@ -2,5 +2,5 @@ Django==3.2.25 django-dbbackup==4.0.0b0 django-storages[boto3]==1.12.3 django-cron==0.5.1 -medcat~=1.15.0 +medcat~=1.16.0 urllib3==1.26.18