From 61c23818111190a9fe0ad68a5c5dff6297255d49 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Thu, 20 May 2021 15:25:08 +0200 Subject: [PATCH 01/12] Use the view queryset in the models list template --- app/escriptorium/templates/core/models_list.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/escriptorium/templates/core/models_list.html b/app/escriptorium/templates/core/models_list.html index 809d1a30..a01584c1 100644 --- a/app/escriptorium/templates/core/models_list.html +++ b/app/escriptorium/templates/core/models_list.html @@ -11,7 +11,7 @@ <th>{% trans "Errors" %}</th> <th>{# buttons #}</th> </tr> - {% for model in document.ocr_models.all %} + {% for model in object_list %} <tr id="tr-{{model.pk}}" class="model-head" data-id="{{model.pk}}"> <td title="{% trans "Model name" %}">{{ model.name }}</td> <td title="{% trans "Model role" %}">{{ model.get_job_display }}</td> -- GitLab From 5259401213c53196bd77231a9842cf5b3280c771 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Thu, 20 May 2021 16:33:53 +0200 Subject: [PATCH 02/12] Remove can_train condition to list models --- app/escriptorium/templates/core/document_nav.html | 2 -- app/escriptorium/templates/core/document_part_edit.html | 2 -- 2 files changed, 4 deletions(-) diff --git a/app/escriptorium/templates/core/document_nav.html b/app/escriptorium/templates/core/document_nav.html index a0dea118..5741aadc 100644 --- a/app/escriptorium/templates/core/document_nav.html +++ b/app/escriptorium/templates/core/document_nav.html @@ -9,11 +9,9 @@ <a href="{% if object %}{% url 'document-update' pk=document.pk %}{% endif %}" class="nav-item nav-link {% block nav-doc-active %}{% endblock %}" id="nav-doc-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Description" %}</a> <a href="{% if object %}{% url 'document-images' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% if not object %}disabled{% endif %} {% block nav-images-active %}{% endblock %}" id="nav-img-tab" role="tab" aria-controls="nav-img" aria-selected="true">{% trans "Images" %}</a> <a href="{% if document %}{% url 'document-part-edit' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% block nav-edit-active %}{% endblock %}{% if not object or not document.parts.count %}disabled{% endif %}" id="nav-edit-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Edit" %}</a> - {% if perms.core.can_train %} {% with models_count=document.ocr_models.count %} <a href="{% if document and models_count %}{% url 'document-models' document_pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% if not document or not models_count %}disabled{% endif %}" id="nav-models-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Models" %}</a> {% endwith %} - {% endif %} {% block extra_nav %}<div class="nav-div nav-item ml-5 ">{{document.name}}</div>{% endblock %} </div> </nav> diff --git a/app/escriptorium/templates/core/document_part_edit.html b/app/escriptorium/templates/core/document_part_edit.html index 0cb1a712..0c6d8577 100644 --- a/app/escriptorium/templates/core/document_part_edit.html +++ b/app/escriptorium/templates/core/document_part_edit.html @@ -13,11 +13,9 @@ <a href="{% if object %}{% url 'document-update' pk=document.pk %}{% endif %}" class="nav-item nav-link {% block nav-doc-active %}{% endblock %}" id="nav-doc-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Description" %}</a> <a href="{% if object %}{% url 'document-images' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% if not object %}disabled{% endif %} {% block nav-images-active %}{% endblock %}" id="nav-img-tab" role="tab" aria-controls="nav-img" aria-selected="true">{% trans "Images" %}</a> <a href="{% if document %}{% url 'document-part-edit' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link active {% if not object or not document.parts.count %}disabled{% endif %}" id="nav-edit-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Edit" %}</a> - {% if perms.core.can_train %} {% with models_count=document.ocr_models.count %} <a href="{% if document and models_count %}{% url 'document-models' document_pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% if not document or not models_count %}disabled{% endif %}" id="nav-models-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Models" %}</a> {% endwith %} - {% endif %} </editor> </div> {% endblock %} -- GitLab From d4438da88090849aeb8be35c8bc69983ff0ef6b9 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Fri, 21 May 2021 09:36:19 +0200 Subject: [PATCH 03/12] Add a link to user models --- app/escriptorium/templates/base.html | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app/escriptorium/templates/base.html b/app/escriptorium/templates/base.html index e88a8886..dbd1cd38 100644 --- a/app/escriptorium/templates/base.html +++ b/app/escriptorium/templates/base.html @@ -65,6 +65,9 @@ <li class="nav-item"> <a class="nav-link {% block nav-proj-list-class %}{% endblock %}" href="{% url 'projects-list' %}">{% trans "My Projects" %}</a> </li> + <li class="nav-item"> + <a class="nav-link" href="{% url 'user-models' %}">{% trans "My models" %}</a> + </li> <li class="nav-item dropdown"> <a class="nav-link dropdown-toggle" href="#" id="navbarDropdown" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false"> {% blocktrans with username=user.username %}Hello {{username}}{% endblocktrans %} -- GitLab From 5a47ec96479ea97483370f760c1054171eb6bcb8 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Fri, 21 May 2021 11:10:06 +0200 Subject: [PATCH 04/12] Split user models and document models views --- app/apps/core/urls.py | 7 ++-- app/apps/core/views.py | 32 +++++++++++-------- .../core/models_list/document_models.html | 9 ++++++ .../templates/core/models_list/main.html | 9 ++++++ .../templates/core/models_list/scripts.html | 16 ++++++++++ .../table.html} | 22 ------------- 6 files changed, 56 insertions(+), 39 deletions(-) create mode 100644 app/escriptorium/templates/core/models_list/document_models.html create mode 100644 app/escriptorium/templates/core/models_list/main.html create mode 100644 app/escriptorium/templates/core/models_list/scripts.html rename app/escriptorium/templates/core/{models_list.html => models_list/table.html} (87%) diff --git a/app/apps/core/urls.py b/app/apps/core/urls.py index 2420d625..434d653f 100644 --- a/app/apps/core/urls.py +++ b/app/apps/core/urls.py @@ -10,7 +10,8 @@ from core.views import (Home, UpdateDocument, EditPart, DocumentImages, - ModelsList, + UserModels, + DocumentModels, ModelDelete, ModelCancelTraining, PublishDocument, @@ -33,11 +34,11 @@ urlpatterns = [ path('document/<int:pk>/part/<int:part_pk>/edit/', EditPart.as_view(), name='document-part-edit'), path('document/<int:pk>/images/', DocumentImages.as_view(), name='document-images'), - path('models/', ModelsList.as_view(), name='user-models'), + path('models/', UserModels.as_view(), name='user-models'), path('model/<int:pk>/delete/', ModelDelete.as_view(), name='model-delete'), path('model/<int:pk>/cancel_training/', ModelCancelTraining.as_view(), name='model-cancel-training'), - path('document/<int:document_pk>/models/', ModelsList.as_view(), name='document-models'), + path('document/<int:document_pk>/models/', DocumentModels.as_view(), name='document-models'), path('document/<int:pk>/publish/', PublishDocument.as_view(), name='document-publish'), path('document/<int:pk>/process/', DocumentPartsProcessAjax.as_view(), name='document-parts-process'), diff --git a/app/apps/core/views.py b/app/apps/core/views.py index d8cfd458..0e2b9f28 100644 --- a/app/apps/core/views.py +++ b/app/apps/core/views.py @@ -296,30 +296,34 @@ class EditPart(LoginRequiredMixin, DetailView): return super().dispatch(*args, **kwargs) -class ModelsList(LoginRequiredMixin, ListView): +class DocumentModels(LoginRequiredMixin, ListView): model = OcrModel - template_name = "core/models_list.html" + template_name = "core/models_list/document_models.html" http_method_names = ('get',) def get_queryset(self): - if 'document_pk' in self.kwargs: - try: - self.document = Document.objects.for_user(self.request.user).get(pk=self.kwargs.get('document_pk')) - except Document.DoesNotExist: - raise PermissionDenied - return self.document.ocr_models.all() - else: - self.document = None - return OcrModel.objects.filter(owner=self.request.user) + try: + self.document = Document.objects.for_user(self.request.user).get(pk=self.kwargs.get('document_pk')) + except Document.DoesNotExist: + raise PermissionDenied + return self.document.ocr_models.all() def get_context_data(self, **kwargs): context = super().get_context_data(**kwargs) - if self.document: - context['document'] = self.document - context['object'] = self.document # legacy + context['document'] = self.document + context['object'] = self.document # legacy return context +class UserModels(LoginRequiredMixin, ListView): + model = OcrModel + template_name = "core/models_list/main.html" + http_method_names = ('get',) + + def get_queryset(self): + return OcrModel.objects.filter(owner=self.request.user) + + class ModelDelete(LoginRequiredMixin, SuccessMessageMixin, DeleteView): model = OcrModel success_message = _("Model deleted successfully!") diff --git a/app/escriptorium/templates/core/models_list/document_models.html b/app/escriptorium/templates/core/models_list/document_models.html new file mode 100644 index 00000000..07adfa0f --- /dev/null +++ b/app/escriptorium/templates/core/models_list/document_models.html @@ -0,0 +1,9 @@ +{% extends 'core/document_nav.html' %} +{% load i18n staticfiles %} + +{% block tab_content %} + {% include 'core/models_list/table.html' %} +{% endblock %} +{% block scripts %} + {% include 'core/models_list/scripts.html' %} +{% endblock %} diff --git a/app/escriptorium/templates/core/models_list/main.html b/app/escriptorium/templates/core/models_list/main.html new file mode 100644 index 00000000..27b6bfcf --- /dev/null +++ b/app/escriptorium/templates/core/models_list/main.html @@ -0,0 +1,9 @@ +{% extends 'base.html' %} +{% load i18n staticfiles %} + +{% block body %} + {% include 'core/models_list/table.html' %} +{% endblock %} +{% block scripts %} + {% include 'core/models_list/scripts.html' %} +{% endblock %} diff --git a/app/escriptorium/templates/core/models_list/scripts.html b/app/escriptorium/templates/core/models_list/scripts.html new file mode 100644 index 00000000..766e258e --- /dev/null +++ b/app/escriptorium/templates/core/models_list/scripts.html @@ -0,0 +1,16 @@ +<script type="text/javascript"> + 'use strict'; + {% if user.onboarding %} + const ONBOARDING_PAGE = "onboarding_models"; + {% endif %} +</script> + +{{ block.super }} +<script type="text/javascript"> +'use strict'; +$(document).ready(function() { + // join the ws room + joinDocumentRoom('{{document.pk}}'); + bootModels(); +}); +</script> diff --git a/app/escriptorium/templates/core/models_list.html b/app/escriptorium/templates/core/models_list/table.html similarity index 87% rename from app/escriptorium/templates/core/models_list.html rename to app/escriptorium/templates/core/models_list/table.html index a01584c1..0c356f74 100644 --- a/app/escriptorium/templates/core/models_list.html +++ b/app/escriptorium/templates/core/models_list/table.html @@ -1,7 +1,5 @@ -{% extends 'core/document_nav.html' %} {% load i18n staticfiles %} -{% block tab_content %} <table id="models-table" class="table table-hover"> <tr> <th class="w-50"></th> @@ -69,23 +67,3 @@ {% endfor %} {% endfor %} </table> -{% endblock %} - -{% block scripts %} -<script type="text/javascript"> - 'use strict'; - {% if user.onboarding %} - const ONBOARDING_PAGE = "onboarding_models"; - {% endif %} -</script> - -{{ block.super }} -<script type="text/javascript"> -'use strict'; -$(document).ready(function() { - // join the ws room - joinDocumentRoom('{{document.pk}}'); - bootModels(); -}); -</script> -{% endblock %} -- GitLab From 05c4d2e7f5d144c204bafe8b4298259dec4dc3b5 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Fri, 21 May 2021 12:36:14 +0200 Subject: [PATCH 05/12] Model upload form --- app/apps/core/forms.py | 16 ++++++++++++++++ app/apps/core/urls.py | 4 +++- app/apps/core/views.py | 15 ++++++++++++++- .../templates/core/models_list/main.html | 1 + .../templates/core/ocrmodel_form.html | 10 ++++++++++ 5 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 app/escriptorium/templates/core/ocrmodel_form.html diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index fe229716..6a404a8b 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -122,6 +122,22 @@ MetadataFormSet = inlineformset_factory(Document, DocumentMetadata, extra=1, can_delete=True) +class ModelUploadForm(BootstrapFormMixin, forms.ModelForm): + name = forms.CharField() + file = forms.FileField( + validators=[FileExtensionValidator( + allowed_extensions=['mlmodel', 'pronn', 'clstm'])] + ) + job = forms.ChoiceField(choices=( + (OcrModel.MODEL_JOB_SEGMENT, _("Segment")), + (OcrModel.MODEL_JOB_RECOGNIZE, _("Recognize")), + )) + + class Meta: + model = OcrModel + fields = ('name', 'file', 'job') + + class DocumentProcessForm1(BootstrapFormMixin, forms.Form): parts = forms.CharField() diff --git a/app/apps/core/urls.py b/app/apps/core/urls.py index 434d653f..1480cc68 100644 --- a/app/apps/core/urls.py +++ b/app/apps/core/urls.py @@ -16,7 +16,8 @@ from core.views import (Home, ModelCancelTraining, PublishDocument, ShareProject, - DocumentPartsProcessAjax) + DocumentPartsProcessAjax, + ModelUpload) urlpatterns = [ path('', Home.as_view(), name='home'), @@ -35,6 +36,7 @@ urlpatterns = [ name='document-part-edit'), path('document/<int:pk>/images/', DocumentImages.as_view(), name='document-images'), path('models/', UserModels.as_view(), name='user-models'), + path('models/new/', ModelUpload.as_view(), name='model-upload'), path('model/<int:pk>/delete/', ModelDelete.as_view(), name='model-delete'), path('model/<int:pk>/cancel_training/', ModelCancelTraining.as_view(), name='model-cancel-training'), diff --git a/app/apps/core/views.py b/app/apps/core/views.py index 0e2b9f28..abc2c00f 100644 --- a/app/apps/core/views.py +++ b/app/apps/core/views.py @@ -16,7 +16,7 @@ from django.views.generic import CreateView, UpdateView, DeleteView from core.models import (Project, Document, DocumentPart, Metadata, OcrModel, AlreadyProcessingException) from core.forms import (ProjectForm, DocumentForm, MetadataFormSet, ProjectShareForm, - UploadImageForm, DocumentProcessForm) + UploadImageForm, DocumentProcessForm, ModelUploadForm) from imports.forms import ImportForm, ExportForm @@ -324,6 +324,19 @@ class UserModels(LoginRequiredMixin, ListView): return OcrModel.objects.filter(owner=self.request.user) +class ModelUpload(LoginRequiredMixin, SuccessMessageMixin, CreateView): + model = OcrModel + form_class = ModelUploadForm + success_message = _("Model uploaded successfully!") + + def get_success_url(self): + return reverse('user-models') + + def form_valid(self, form): + form.instance.owner = self.request.user + return super().form_valid(form) + + class ModelDelete(LoginRequiredMixin, SuccessMessageMixin, DeleteView): model = OcrModel success_message = _("Model deleted successfully!") diff --git a/app/escriptorium/templates/core/models_list/main.html b/app/escriptorium/templates/core/models_list/main.html index 27b6bfcf..e554bbef 100644 --- a/app/escriptorium/templates/core/models_list/main.html +++ b/app/escriptorium/templates/core/models_list/main.html @@ -3,6 +3,7 @@ {% block body %} {% include 'core/models_list/table.html' %} + <a href="{% url 'model-upload' %}" class="btn btn-primary">{% trans "Upload a model" %}</a> {% endblock %} {% block scripts %} {% include 'core/models_list/scripts.html' %} diff --git a/app/escriptorium/templates/core/ocrmodel_form.html b/app/escriptorium/templates/core/ocrmodel_form.html new file mode 100644 index 00000000..7c21c80f --- /dev/null +++ b/app/escriptorium/templates/core/ocrmodel_form.html @@ -0,0 +1,10 @@ +{% extends 'base.html' %} +{% load i18n staticfiles bootstrap %} + +{% block body %} +<h2>{% trans "Upload a model" %}</h2> +<form method="post" class="inline-form" enctype="multipart/form-data" action="{% url 'model-upload' %}">{% csrf_token %} + {{ form.as_p }} + <button type="submit" value="" class="nav-item btn btn-success">{% trans 'Upload' %}</button> +</form> +{% endblock %} -- GitLab From 4e1a5ce3faa996f7638441f4c10863342ecc9c28 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Fri, 21 May 2021 16:05:05 +0200 Subject: [PATCH 06/12] Clean multiple upload fields --- app/apps/core/forms.py | 108 ++---------------- .../templates/core/wizards/segment.html | 7 +- .../templates/core/wizards/segtrain.html | 4 - .../templates/core/wizards/train.html | 6 - .../templates/core/wizards/transcribe.html | 7 +- 5 files changed, 12 insertions(+), 120 deletions(-) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 6a404a8b..9b91fece 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -192,29 +192,12 @@ class DocumentSegmentForm(DocumentProcessForm1): ('vertical-rl', _("Vertical r2l"))) text_direction = forms.ChoiceField(initial='horizontal-lr', required=False, choices=TEXT_DIRECTION_CHOICES) - upload_model = forms.FileField(required=False, - validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])]) def clean(self): data = super().clean() model_job = OcrModel.MODEL_JOB_SEGMENT - if data.get('upload_model'): - model = OcrModel.objects.create( - owner=self.user, - name=data['upload_model'].name.rsplit('.', 1)[0], - job=model_job) - OcrModelDocument.objects.create( - document=self.parts[0].document, - ocr_model=model, - executed_on=timezone.now(), - ) - # Note: needs to save the file in a second step because the path needs the db PK - model.file = data['upload_model'] - model.save() - - elif data.get('seg_model'): + if data.get('seg_model'): model = data.get('seg_model') ocr_model_document, created = OcrModelDocument.objects.get_or_create( ocr_model=model, @@ -247,9 +230,6 @@ class DocumentTrainForm(DocumentProcessForm1): train_model = forms.ModelChoiceField(queryset=OcrModel.objects .filter(job=OcrModel.MODEL_JOB_RECOGNIZE), label=_("Model"), required=False) - upload_model = forms.FileField(required=False, - validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])]) transcription = forms.ModelChoiceField(queryset=Transcription.objects.all(), required=False) @@ -275,20 +255,6 @@ class DocumentTrainForm(DocumentProcessForm1): ocr_model_document.trained_on = timezone.now() ocr_model_document.save() - elif data.get('upload_model'): - model = OcrModel.objects.create( - owner=self.user, - name=data['upload_model'].name.rsplit('.', 1)[0], - job=model_job) - OcrModelDocument.objects.create( - document=self.parts[0].document, - ocr_model=model, - trained_on=timezone.now(), - ) - # Note: needs to save the file in a second step because the path needs the db PK - model.file = data['upload_model'] - model.save() - elif data.get('new_model'): # file will be created by the training process model = OcrModel.objects.create( @@ -321,10 +287,6 @@ class DocumentSegtrainForm(DocumentProcessForm1): segtrain_model = forms.ModelChoiceField(queryset=OcrModel.objects .filter(job=OcrModel.MODEL_JOB_SEGMENT), label=_("Model"), required=False) - upload_model = forms.FileField(required=False, - validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])]) - new_model = forms.CharField(required=False, label=_('Model name')) def clean(self): @@ -345,19 +307,6 @@ class DocumentSegtrainForm(DocumentProcessForm1): if not created: ocr_model_document.trained_on = timezone.now() ocr_model_document.save() - elif data.get('upload_model'): - model = OcrModel.objects.create( - owner=self.user, - name=data['upload_model'].name.rsplit('.', 1)[0], - job=model_job) - OcrModelDocument.objects.create( - document=self.parts[0].document, - ocr_model=model, - trained_on=timezone.now(), - ) - # Note: needs to save the file in a second step because the path needs the db PK - model.file = data['upload_model'] - model.save() elif data.get('new_model'): # file will be created by the training process @@ -388,9 +337,6 @@ class DocumentSegtrainForm(DocumentProcessForm1): class DocumentTranscribeForm(DocumentProcessForm1): - upload_model = forms.FileField(required=False, - validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])]) ocr_model = forms.ModelChoiceField(queryset=OcrModel.objects .filter(job=OcrModel.MODEL_JOB_RECOGNIZE), label=_("Model"), required=False) @@ -400,33 +346,15 @@ class DocumentTranscribeForm(DocumentProcessForm1): model_job = OcrModel.MODEL_JOB_RECOGNIZE - if data.get('upload_model'): - model = OcrModel.objects.create( - owner=self.user, - name=data['upload_model'].name.rsplit('.', 1)[0], - job=model_job) - OcrModelDocument.objects.create( - document=self.parts[0].document, - ocr_model=model, - executed_on=timezone.now(), - ) - # Note: needs to save the file in a second step because the path needs the db PK - model.file = data['upload_model'] - model.save() - - elif data.get('ocr_model'): - model = data.get('ocr_model') - ocr_model_document, created = OcrModelDocument.objects.get_or_create( - ocr_model=model, - document=self.parts[0].document, - defaults={'executed_on': timezone.now()} - ) - if not created: - ocr_model_document.executed_on = timezone.now() - ocr_model_document.save() - else: - raise forms.ValidationError( - _("Either select a name for your new model or an existing one.")) + model = data['ocr_model'] + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'executed_on': timezone.now()} + ) + if not created: + ocr_model_document.executed_on = timezone.now() + ocr_model_document.save() data['model'] = model return data @@ -490,9 +418,6 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): text_direction = forms.ChoiceField(initial='horizontal-lr', required=False, choices=TEXT_DIRECTION_CHOICES) # transcribe - upload_model = forms.FileField(required=False, - validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])]) ocr_model = forms.ModelChoiceField(queryset=OcrModel.objects .filter(job=OcrModel.MODEL_JOB_RECOGNIZE), label=_("Model"), required=False) @@ -591,19 +516,6 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): if not created: ocr_model_document.trained_on = timezone.now() ocr_model_document.save() - elif data.get('upload_model'): - model = OcrModel.objects.create( - owner=self.user, - name=data['upload_model'].name.rsplit('.', 1)[0], - job=model_job) - OcrModelDocument.objects.create( - document=self.parts[0].document, - ocr_model=model, - executed_on=timezone.now(), - ) - # Note: needs to save the file in a second step because the path needs the db PK - model.file = data['upload_model'] - model.save() elif data.get('new_model'): # file will be created by the training process diff --git a/app/escriptorium/templates/core/wizards/segment.html b/app/escriptorium/templates/core/wizards/segment.html index 52adbc7a..a810c574 100644 --- a/app/escriptorium/templates/core/wizards/segment.html +++ b/app/escriptorium/templates/core/wizards/segment.html @@ -8,12 +8,7 @@ {% block wizard_fields %} <div class="form-group"> - <h5>{% trans "Upload a model" %}</h5> - {% render_field process_form.upload_model class="js-proc-settings" accept=".mlmodel" %} -</div> - -<div class="form-group"> - <h5>{% trans "Or select an existing one" %}</h5> + <h5>{% trans "Select an model" %}</h5> {% render_field process_form.seg_model class="js-proc-settings" %} </div> diff --git a/app/escriptorium/templates/core/wizards/segtrain.html b/app/escriptorium/templates/core/wizards/segtrain.html index 6b279d14..40cb84a6 100644 --- a/app/escriptorium/templates/core/wizards/segtrain.html +++ b/app/escriptorium/templates/core/wizards/segtrain.html @@ -10,10 +10,6 @@ <h5>{% trans "New model" %}</h5> {% render_field process_form.new_model class="js-proc-settings" %} </div> -<div class="form-group"> - <h5>{% trans "Or Upload a model" %}</h5> - {% render_field process_form.upload_model class="js-proc-settings" accept=".mlmodel,.clstm,.pronn" %} -</div> <div class="form-group"> <h5>{% trans "Or select an existing one" %}</h5> {% render_field process_form.segtrain_model class="js-proc-settings" %} diff --git a/app/escriptorium/templates/core/wizards/train.html b/app/escriptorium/templates/core/wizards/train.html index 458de59b..f96267bf 100644 --- a/app/escriptorium/templates/core/wizards/train.html +++ b/app/escriptorium/templates/core/wizards/train.html @@ -14,12 +14,6 @@ {% render_field process_form.new_model class="js-proc-settings" %} </div> -<div class="form-group"> - <h5>{% trans "Or upload a model" %}</h5> - {% render_field process_form.upload_model class="js-proc-settings" accept=".mlmodel,.clstm,.pronn" %} - -</div> - <div class="form-group"> <h5>{% trans "Or select an existing one" %}</h5> {% render_field process_form.train_model class="js-proc-settings" %} diff --git a/app/escriptorium/templates/core/wizards/transcribe.html b/app/escriptorium/templates/core/wizards/transcribe.html index 8226fd83..901b3186 100644 --- a/app/escriptorium/templates/core/wizards/transcribe.html +++ b/app/escriptorium/templates/core/wizards/transcribe.html @@ -7,12 +7,7 @@ {% block wizard_fields %} <div class="form-group"> - <h5>{% trans "Upload model" %}</h5> - {% render_field process_form.upload_model class="js-proc-settings" data_document=document.pk accept=".mlmodel,.clstm,.pronn" %} -</div> - -<div class="form-group"> - <h5>{% trans "Or use existing model" %}</h5> + <h5>{% trans "Select a model" %}</h5> {% render_field process_form.ocr_model class="js-proc-settings" data_document=document.pk %} </div> {% endblock %} -- GitLab From fafa56d929c418b34807c0011a1f6faa7d95b0d6 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Fri, 21 May 2021 16:27:16 +0200 Subject: [PATCH 07/12] Limit qs to models owned by the user or linked to the document --- app/apps/core/forms.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 9b91fece..27b15ab2 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -447,12 +447,18 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): if self.document.read_direction == self.document.READ_DIRECTION_RTL: self.initial['text_direction'] = 'horizontal-rl' self.fields['binarizer'].widget.attrs['disabled'] = True - self.fields['train_model'].queryset &= self.document.ocr_models.all() - self.fields['segtrain_model'].queryset &= self.document.ocr_models.all() - self.fields['seg_model'].queryset &= self.document.ocr_models.all() - self.fields['ocr_model'].queryset &= OcrModel.objects.filter( + # Limit qs to models owned by the user or already linked to this document + for field in ['train_model', 'segtrain_model', 'seg_model']: + self.fields[field].queryset = self.fields[field].queryset.filter( + Q(owner=self.user) + | Q(documents=self.document) + ) + self.fields['ocr_model'].queryset = self.fields['ocr_model'].queryset.filter( + # Include non owned RECOGNIZE models which have the same script that the main document Q(documents=None, script=self.document.main_script) - | Q(documents=self.document)) + | Q(owner=self.user) + | Q(documents=self.document) + ) self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) @cached_property -- GitLab From 98f3ac6b303318a9d7e8b0fbfd85519c29b7b657 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Tue, 25 May 2021 17:41:26 +0200 Subject: [PATCH 08/12] Suggestions --- app/apps/core/forms.py | 45 +++++++++++++------ app/apps/core/views.py | 2 + .../templates/core/models_list/main.html | 5 ++- .../templates/core/models_list/table.html | 3 +- .../templates/core/ocrmodel_form.html | 11 ++++- 5 files changed, 47 insertions(+), 19 deletions(-) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 27b15ab2..8ce7c2b6 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -5,6 +5,7 @@ from PIL import Image from django import forms from django.conf import settings from django.core.validators import FileExtensionValidator, MinValueValidator, MaxValueValidator +from django.core.files.uploadedfile import TemporaryUploadedFile from django.db.models import Q from django.forms.models import inlineformset_factory from django.utils import timezone @@ -17,6 +18,7 @@ from core.models import (Project, Document, Metadata, DocumentMetadata, BlockType, LineType, AlreadyProcessingException) from users.models import User +from kraken.lib import vgsl logger = logging.getLogger(__name__) @@ -126,16 +128,35 @@ class ModelUploadForm(BootstrapFormMixin, forms.ModelForm): name = forms.CharField() file = forms.FileField( validators=[FileExtensionValidator( - allowed_extensions=['mlmodel', 'pronn', 'clstm'])] + allowed_extensions=['mlmodel'])] ) - job = forms.ChoiceField(choices=( - (OcrModel.MODEL_JOB_SEGMENT, _("Segment")), - (OcrModel.MODEL_JOB_RECOGNIZE, _("Recognize")), - )) + + def clean(self): + data = super().clean() + model = data.get('file') + if not model: + return + # Early validation of the model loading and detection of its job + try: + assert isinstance(model, TemporaryUploadedFile) + loaded_model = vgsl.TorchVGSLModel.load_model(model.file.name) + hyper_params = loaded_model.user_metadata.get('hyper_params', {}) + except Exception as e: + raise forms.ValidationError(_(f"The provided model could not be loaded: {e}")) + + # TODO handle hyper_params job detection + job = str(hyper_params.get('job', '')).lower() + if job == 'segment': + self.instance.job = OcrModel.MODEL_JOB_SEGMENT + elif job == 'recognize': + self.instance.job = OcrModel.MODEL_JOB_RECOGNIZE + else: + raise forms.ValidationError(_("No job type is defined in model's hyper parameters")) + return data class Meta: model = OcrModel - fields = ('name', 'file', 'job') + fields = ('name', 'file') class DocumentProcessForm1(BootstrapFormMixin, forms.Form): @@ -447,18 +468,14 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): if self.document.read_direction == self.document.READ_DIRECTION_RTL: self.initial['text_direction'] = 'horizontal-rl' self.fields['binarizer'].widget.attrs['disabled'] = True - # Limit qs to models owned by the user or already linked to this document - for field in ['train_model', 'segtrain_model', 'seg_model']: + + # Limit querysets to models owned by the user or already linked to this document + for field in ['train_model', 'segtrain_model', 'seg_model', 'ocr_model']: self.fields[field].queryset = self.fields[field].queryset.filter( Q(owner=self.user) | Q(documents=self.document) ) - self.fields['ocr_model'].queryset = self.fields['ocr_model'].queryset.filter( - # Include non owned RECOGNIZE models which have the same script that the main document - Q(documents=None, script=self.document.main_script) - | Q(owner=self.user) - | Q(documents=self.document) - ) + self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) @cached_property diff --git a/app/apps/core/views.py b/app/apps/core/views.py index abc2c00f..efbb3fb3 100644 --- a/app/apps/core/views.py +++ b/app/apps/core/views.py @@ -300,6 +300,7 @@ class DocumentModels(LoginRequiredMixin, ListView): model = OcrModel template_name = "core/models_list/document_models.html" http_method_names = ('get',) + paginate_by = 20 def get_queryset(self): try: @@ -319,6 +320,7 @@ class UserModels(LoginRequiredMixin, ListView): model = OcrModel template_name = "core/models_list/main.html" http_method_names = ('get',) + paginate_by = 20 def get_queryset(self): return OcrModel.objects.filter(owner=self.request.user) diff --git a/app/escriptorium/templates/core/models_list/main.html b/app/escriptorium/templates/core/models_list/main.html index e554bbef..674eba42 100644 --- a/app/escriptorium/templates/core/models_list/main.html +++ b/app/escriptorium/templates/core/models_list/main.html @@ -2,8 +2,9 @@ {% load i18n staticfiles %} {% block body %} - {% include 'core/models_list/table.html' %} - <a href="{% url 'model-upload' %}" class="btn btn-primary">{% trans "Upload a model" %}</a> +<a href="{% url 'model-upload' %}" class="btn btn-success float-sm-right">{% trans "Upload a model" %}</a> +<h2>{% trans "My Models" %}</h2> +{% include 'core/models_list/table.html' %} {% endblock %} {% block scripts %} {% include 'core/models_list/scripts.html' %} diff --git a/app/escriptorium/templates/core/models_list/table.html b/app/escriptorium/templates/core/models_list/table.html index 0c356f74..d5731aa0 100644 --- a/app/escriptorium/templates/core/models_list/table.html +++ b/app/escriptorium/templates/core/models_list/table.html @@ -9,7 +9,7 @@ <th>{% trans "Errors" %}</th> <th>{# buttons #}</th> </tr> - {% for model in object_list %} + {% for model in page_obj %} <tr id="tr-{{model.pk}}" class="model-head" data-id="{{model.pk}}"> <td title="{% trans "Model name" %}">{{ model.name }}</td> <td title="{% trans "Model role" %}">{{ model.get_job_display }}</td> @@ -67,3 +67,4 @@ {% endfor %} {% endfor %} </table> +{% include 'includes/pagination.html' %} diff --git a/app/escriptorium/templates/core/ocrmodel_form.html b/app/escriptorium/templates/core/ocrmodel_form.html index 7c21c80f..6a2cc113 100644 --- a/app/escriptorium/templates/core/ocrmodel_form.html +++ b/app/escriptorium/templates/core/ocrmodel_form.html @@ -3,8 +3,15 @@ {% block body %} <h2>{% trans "Upload a model" %}</h2> -<form method="post" class="inline-form" enctype="multipart/form-data" action="{% url 'model-upload' %}">{% csrf_token %} - {{ form.as_p }} +<form method="post" class="inline-form" enctype="multipart/form-data" action="{% url 'model-upload' %}"> + {% csrf_token %} + {% render_field form.file %} + {% render_field form.name %} + {% if form.non_field_errors %} + {% for err in form.non_field_errors %} + <p class="error">{{ err }}</p> + {% endfor %} + {% endif %} <button type="submit" value="" class="nav-item btn btn-success">{% trans 'Upload' %}</button> </form> {% endblock %} -- GitLab From bc8660922568323fdcc66cd306ea331671c4fd7e Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Wed, 26 May 2021 10:27:27 +0200 Subject: [PATCH 09/12] Add a minimalist JS code to auto fill name --- app/escriptorium/templates/core/ocrmodel_form.html | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/app/escriptorium/templates/core/ocrmodel_form.html b/app/escriptorium/templates/core/ocrmodel_form.html index 6a2cc113..4882dc8f 100644 --- a/app/escriptorium/templates/core/ocrmodel_form.html +++ b/app/escriptorium/templates/core/ocrmodel_form.html @@ -15,3 +15,17 @@ <button type="submit" value="" class="nav-item btn btn-success">{% trans 'Upload' %}</button> </form> {% endblock %} +{% block scripts %} +<script type="text/javascript"> +/* Handle a basic name field auto filling */ +const fileInput = document.getElementById('id_file'); +const nameInput = document.getElementById('id_name'); +let currentFileName = ''; +fileInput.onchange = function() { + if (fileInput.files.length && (!nameInput.value || nameInput.value == currentFileName)) { + nameInput.value = fileInput.files[0].name; + currentFileName = nameInput.value; + } +}; +</script> +{% endblock %} -- GitLab From cd64931bff59aeb9dfebb97427daf9247e4dc6c1 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Wed, 26 May 2021 12:54:40 +0200 Subject: [PATCH 10/12] Update automatic job detection --- app/apps/core/forms.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 8ce7c2b6..16c78510 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -140,15 +140,16 @@ class ModelUploadForm(BootstrapFormMixin, forms.ModelForm): try: assert isinstance(model, TemporaryUploadedFile) loaded_model = vgsl.TorchVGSLModel.load_model(model.file.name) - hyper_params = loaded_model.user_metadata.get('hyper_params', {}) + job = loaded_model.model_type + # Fall back to seg_type attribute which cannot be set to 'bbox' for recognition jobs + if job not in ('recognition', 'segmentation') and loaded_model.seg_type == 'bbox': + job = 'segmentation' except Exception as e: raise forms.ValidationError(_(f"The provided model could not be loaded: {e}")) - # TODO handle hyper_params job detection - job = str(hyper_params.get('job', '')).lower() - if job == 'segment': + if job == 'segmentation': self.instance.job = OcrModel.MODEL_JOB_SEGMENT - elif job == 'recognize': + elif job == 'recognition': self.instance.job = OcrModel.MODEL_JOB_RECOGNIZE else: raise forms.ValidationError(_("No job type is defined in model's hyper parameters")) -- GitLab From b2871ac68fb6e8816e88f22c8bc7b90e4612f737 Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Wed, 26 May 2021 12:57:23 +0200 Subject: [PATCH 11/12] Typo --- app/escriptorium/templates/core/wizards/segment.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/escriptorium/templates/core/wizards/segment.html b/app/escriptorium/templates/core/wizards/segment.html index a810c574..311094d1 100644 --- a/app/escriptorium/templates/core/wizards/segment.html +++ b/app/escriptorium/templates/core/wizards/segment.html @@ -8,7 +8,7 @@ {% block wizard_fields %} <div class="form-group"> - <h5>{% trans "Select an model" %}</h5> + <h5>{% trans "Select a model" %}</h5> {% render_field process_form.seg_model class="js-proc-settings" %} </div> -- GitLab From dc5c662c5a46bde3558c16ec8ec7af1a8ab08b5f Mon Sep 17 00:00:00 2001 From: Valentin Rigal <rigal@teklia.com> Date: Wed, 26 May 2021 13:50:30 +0200 Subject: [PATCH 12/12] Suggestions --- app/apps/core/forms.py | 55 ++++++++++--------- .../templates/core/ocrmodel_form.html | 5 -- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 16c78510..4d9ea04b 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -19,6 +19,7 @@ from core.models import (Project, Document, Metadata, DocumentMetadata, BlockType, LineType, AlreadyProcessingException) from users.models import User from kraken.lib import vgsl +from kraken.lib.exceptions import KrakenInvalidModelException logger = logging.getLogger(__name__) @@ -131,29 +132,29 @@ class ModelUploadForm(BootstrapFormMixin, forms.ModelForm): allowed_extensions=['mlmodel'])] ) - def clean(self): - data = super().clean() - model = data.get('file') - if not model: - return - # Early validation of the model loading and detection of its job + def clean_file(self): + # Early validation of the model loading + file_field = self.cleaned_data['file'] try: - assert isinstance(model, TemporaryUploadedFile) - loaded_model = vgsl.TorchVGSLModel.load_model(model.file.name) - job = loaded_model.model_type - # Fall back to seg_type attribute which cannot be set to 'bbox' for recognition jobs - if job not in ('recognition', 'segmentation') and loaded_model.seg_type == 'bbox': - job = 'segmentation' - except Exception as e: - raise forms.ValidationError(_(f"The provided model could not be loaded: {e}")) - - if job == 'segmentation': + model = vgsl.TorchVGSLModel.load_model(file_field.file.name) + except KrakenInvalidModelException: + raise forms.ValidationError(_("The provided model could not be loaded.")) + self._model_job = model.model_type + if self._model_job not in ('segmentation', 'recognition'): + raise forms.ValidationError(_("Invalid model (Couldn't determine whether it's a segmentation or recognition model).")) + elif self._model_job == 'recognition' and model.seg_type == "bbox": + raise forms.ValidationError(_("eScriptorium is not compatible with bounding box models.")) + return file_field + + def clean(self): + if not getattr(self, '_model_job', None): + return super().clean() + # Update the job field on the instantiated model from the cleaned model field + if self._model_job == 'segmentation': self.instance.job = OcrModel.MODEL_JOB_SEGMENT - elif job == 'recognition': + elif self._model_job == 'recognition': self.instance.job = OcrModel.MODEL_JOB_RECOGNIZE - else: - raise forms.ValidationError(_("No job type is defined in model's hyper parameters")) - return data + return super().clean() class Meta: model = OcrModel @@ -180,12 +181,14 @@ class DocumentProcessForm1(BootstrapFormMixin, forms.Form): if self.document.read_direction == self.document.READ_DIRECTION_RTL: self.initial['text_direction'] = 'horizontal-rl' self.fields['binarizer'].widget.attrs['disabled'] = True - self.fields['train_model'].queryset &= self.document.ocr_models.all() - self.fields['segtrain_model'].queryset &= self.document.ocr_models.all() - self.fields['seg_model'].queryset &= self.document.ocr_models.all() - self.fields['ocr_model'].queryset &= OcrModel.objects.filter( - Q(documents=None, script=self.document.main_script) - | Q(documents=self.document)) + + # Limit querysets to models owned by the user or already linked to this document + for field in ['train_model', 'segtrain_model', 'seg_model', 'ocr_model']: + self.fields[field].queryset = self.fields[field].queryset.filter( + Q(owner=self.user) + | Q(documents=self.document) + ) + self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) def process(self): diff --git a/app/escriptorium/templates/core/ocrmodel_form.html b/app/escriptorium/templates/core/ocrmodel_form.html index 4882dc8f..57efaad1 100644 --- a/app/escriptorium/templates/core/ocrmodel_form.html +++ b/app/escriptorium/templates/core/ocrmodel_form.html @@ -7,11 +7,6 @@ {% csrf_token %} {% render_field form.file %} {% render_field form.name %} - {% if form.non_field_errors %} - {% for err in form.non_field_errors %} - <p class="error">{{ err }}</p> - {% endfor %} - {% endif %} <button type="submit" value="" class="nav-item btn btn-success">{% trans 'Upload' %}</button> </form> {% endblock %} -- GitLab