diff --git a/app/apps/api/serializers.py b/app/apps/api/serializers.py index 8076d780b3ad5baaf96cd3999fac2297f4f9d52a..3feb762dd85c532be2073fce3520346be257194a 100644 --- a/app/apps/api/serializers.py +++ b/app/apps/api/serializers.py @@ -4,6 +4,7 @@ import html from django.conf import settings from django.db.utils import IntegrityError +from django.utils import timezone from django.utils.translation import gettext_lazy as _ from rest_framework import serializers @@ -21,7 +22,8 @@ from core.models import (Project, BlockType, LineType, Script, - OcrModel) + OcrModel, + OcrModelDocument) from core.tasks import (segtrain, train, segment, transcribe) logger = logging.getLogger(__name__) @@ -312,9 +314,9 @@ class OcrModelSerializer(serializers.ModelSerializer): def create(self, data): document = Document.objects.get(pk=self.context["view"].kwargs["document_pk"]) - data['document'] = document data['owner'] = self.context["view"].request.user obj = super().create(data) + document.ocr_models.add(obj) return obj @@ -354,8 +356,7 @@ class SegmentSerializer(ProcessSerializerMixin, serializers.Serializer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fields['model'].queryset = OcrModel.objects.filter(job=OcrModel.MODEL_JOB_SEGMENT, - document=self.document) + self.fields['model'].queryset = self.document.ocr_models.filter(job=OcrModel.MODEL_JOB_SEGMENT) self.fields['parts'].queryset = DocumentPart.objects.filter(document=self.document) def process(self): @@ -381,8 +382,7 @@ class SegTrainSerializer(ProcessSerializerMixin, serializers.Serializer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.fields['model'].queryset = OcrModel.objects.filter(job=OcrModel.MODEL_JOB_SEGMENT, - document=self.document) + self.fields['model'].queryset = self.document.ocr_models.filter(job=OcrModel.MODEL_JOB_SEGMENT) self.fields['parts'].queryset = DocumentPart.objects.filter(document=self.document) def validate_parts(self, data): @@ -402,14 +402,18 @@ class SegTrainSerializer(ProcessSerializerMixin, serializers.Serializer): if self.validated_data.get('model_name'): file_ = model and model.file or None model = OcrModel.objects.create( - document=self.document, owner=self.user, name=self.validated_data['model_name'], job=OcrModel.MODEL_JOB_RECOGNIZE, file=file_ ) + OcrModelDocument.objects.create( + document=self.document, + ocr_model=model, + executed_on=timezone.now(), + ) - segtrain.delay(model.pk if model else None, + segtrain.delay(model.pk if model else None, self.document.pk, [part.pk for part in self.validated_data.get('parts')], user_pk=self.user.pk) @@ -425,8 +429,7 @@ class TrainSerializer(ProcessSerializerMixin, serializers.Serializer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) - self.fields['model'].queryset = OcrModel.objects.filter(job=OcrModel.MODEL_JOB_RECOGNIZE, - document=self.document) + self.fields['model'].queryset = self.document.ocr_models.filter(job=OcrModel.MODEL_JOB_RECOGNIZE) self.fields['parts'].queryset = DocumentPart.objects.filter(document=self.document) def validate(self, data): @@ -441,11 +444,15 @@ class TrainSerializer(ProcessSerializerMixin, serializers.Serializer): if self.validated_data.get('model_name'): file_ = model and model.file or None model = OcrModel.objects.create( - document=self.document, owner=self.user, name=self.validated_data['model_name'], job=OcrModel.MODEL_JOB_RECOGNIZE, file=file_) + OcrModelDocument.objects.create( + document=self.document, + ocr_model=model, + executed_on=timezone.now(), + ) train.delay([part.pk for part in self.validated_data.get('parts')], self.validated_data['transcription'].pk, @@ -462,8 +469,7 @@ class TranscribeSerializer(ProcessSerializerMixin, serializers.Serializer): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) - self.fields['model'].queryset = OcrModel.objects.filter(job=OcrModel.MODEL_JOB_RECOGNIZE, - document=self.document) + self.fields['model'].queryset = self.document.ocr_models.filter(job=OcrModel.MODEL_JOB_RECOGNIZE) self.fields['parts'].queryset = DocumentPart.objects.filter(document=self.document) def process(self): diff --git a/app/apps/api/tests.py b/app/apps/api/tests.py index fb1ee141f2dd08f64d9a3fc76ad0354f9974bcf0..6523e1f2070b693120f9d096678f9dc6699dfdaa 100644 --- a/app/apps/api/tests.py +++ b/app/apps/api/tests.py @@ -35,12 +35,12 @@ class OcrModelViewSetTestCase(CoreFactoryTestCase): super().setUp() self.part = self.factory.make_part() self.user = self.part.document.owner - self.model = self.factory.make_model(document=self.part.document) + self.model = self.factory.make_model(self.part.document) def test_list(self): self.client.force_login(self.user) uri = reverse('api:model-list', kwargs={'document_pk': self.part.document.pk}) - with self.assertNumQueries(7): + with self.assertNumQueries(8): resp = self.client.get(uri) self.assertEqual(resp.status_code, 200) @@ -49,14 +49,14 @@ class OcrModelViewSetTestCase(CoreFactoryTestCase): uri = reverse('api:model-detail', kwargs={'document_pk': self.part.document.pk, 'pk': self.model.pk}) - with self.assertNumQueries(6): + with self.assertNumQueries(7): resp = self.client.get(uri) self.assertEqual(resp.status_code, 200) def test_create(self): self.client.force_login(self.user) uri = reverse('api:model-list', kwargs={'document_pk': self.part.document.pk}) - with self.assertNumQueries(4): + with self.assertNumQueries(6): resp = self.client.post(uri, { 'name': 'test.mlmodel', 'file': self.factory.make_asset_file(name='test.mlmodel', @@ -100,7 +100,7 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): def test_list(self): self.client.force_login(self.doc.owner) uri = reverse('api:document-list') - with self.assertNumQueries(8): + with self.assertNumQueries(10): resp = self.client.get(uri) self.assertEqual(resp.status_code, 200) @@ -108,7 +108,7 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): self.client.force_login(self.doc.owner) uri = reverse('api:document-detail', kwargs={'pk': self.doc.pk}) - with self.assertNumQueries(7): + with self.assertNumQueries(8): resp = self.client.get(uri) self.assertEqual(resp.status_code, 200) @@ -123,7 +123,7 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): def test_segtrain_less_two_parts(self): self.client.force_login(self.doc.owner) - model = self.factory.make_model(job=OcrModel.MODEL_JOB_SEGMENT, document=self.doc) + model = self.factory.make_model(self.doc, job=OcrModel.MODEL_JOB_SEGMENT) uri = reverse('api:document-segtrain', kwargs={'pk': self.doc.pk}) resp = self.client.post(uri, data={ 'parts': [self.part.pk], @@ -147,7 +147,7 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): def test_segtrain_existing_model_rename(self): self.client.force_login(self.doc.owner) - model = self.factory.make_model(job=OcrModel.MODEL_JOB_SEGMENT, document=self.doc) + model = self.factory.make_model(self.doc, job=OcrModel.MODEL_JOB_SEGMENT) uri = reverse('api:document-segtrain', kwargs={'pk': self.doc.pk}) resp = self.client.post(uri, data={ 'parts': [self.part.pk, self.part2.pk], @@ -160,7 +160,7 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): def test_segment(self): uri = reverse('api:document-segment', kwargs={'pk': self.doc.pk}) self.client.force_login(self.doc.owner) - model = self.factory.make_model(job=OcrModel.MODEL_JOB_SEGMENT, document=self.doc) + model = self.factory.make_model(self.doc, job=OcrModel.MODEL_JOB_SEGMENT) resp = self.client.post(uri, data={ 'parts': [self.part.pk, self.part2.pk], 'seg_steps': 'both', @@ -177,15 +177,13 @@ class DocumentViewSetTestCase(CoreFactoryTestCase): 'transcription': self.transcription.pk }) self.assertEqual(resp.status_code, 200) - self.assertEqual(OcrModel.objects.filter( - document=self.doc, - job=OcrModel.MODEL_JOB_RECOGNIZE).count(), 1) + self.assertEqual(self.doc.ocr_models.filter(job=OcrModel.MODEL_JOB_RECOGNIZE).count(), 1) def test_transcribe(self): trans = Transcription.objects.create(document=self.part.document) self.client.force_login(self.doc.owner) - model = self.factory.make_model(job=OcrModel.MODEL_JOB_RECOGNIZE, document=self.doc) + model = self.factory.make_model(self.doc, job=OcrModel.MODEL_JOB_RECOGNIZE) uri = reverse('api:document-transcribe', kwargs={'pk': self.doc.pk}) resp = self.client.post(uri, data={ 'parts': [self.part.pk, self.part2.pk], diff --git a/app/apps/api/views.py b/app/apps/api/views.py index 1870d0e1e24c10c9e5d887dbff2b665bb95b1376..d4429bc3cae4829400d83a71166ee7e8e042047d 100644 --- a/app/apps/api/views.py +++ b/app/apps/api/views.py @@ -482,7 +482,7 @@ class OcrModelViewSet(DocumentPermissionMixin, ModelViewSet): def get_queryset(self): return (super().get_queryset() - .filter(document=self.kwargs['document_pk'])) + .filter(documents=self.kwargs['document_pk'])) @action(detail=True, methods=['post']) def cancel_training(self, request, pk=None): diff --git a/app/apps/core/admin.py b/app/apps/core/admin.py index a93a63c6bb868a5bc7d2170e99d34dbaaa9cb6b0..97c0787e630228b7fedc96e27e5ffa24b6044775 100644 --- a/app/apps/core/admin.py +++ b/app/apps/core/admin.py @@ -7,6 +7,7 @@ from core.models import (Project, DocumentMetadata, LineTranscription, OcrModel, + OcrModelDocument, Script, DocumentType, DocumentPartType, @@ -18,9 +19,13 @@ class MetadataInline(admin.TabularInline): model = DocumentMetadata +class OcrModelDocumentInline(admin.TabularInline): + model = OcrModelDocument + + class DocumentAdmin(admin.ModelAdmin): list_display = ['pk', 'name', 'owner', 'project'] - inlines = (MetadataInline,) + inlines = (MetadataInline, OcrModelDocumentInline) class DocumentPartAdmin(admin.ModelAdmin): @@ -43,6 +48,11 @@ class ScriptAdmin(admin.ModelAdmin): class OcrModelAdmin(admin.ModelAdmin): list_display = ['name', 'job', 'owner', 'script', 'training'] + inlines = (OcrModelDocumentInline,) + + +class OcrModelDocumentAdmin(admin.ModelAdmin): + list_display = ['document', 'ocr_model', 'trained_on', 'executed_on', 'created_at'] admin.site.register(Project) @@ -56,3 +66,4 @@ admin.site.register(LineType) admin.site.register(Script, ScriptAdmin) admin.site.register(Metadata) admin.site.register(OcrModel, OcrModelAdmin) +admin.site.register(OcrModelDocument, OcrModelDocumentAdmin) diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index d8e14cd7cb224ec8d5a926ee4ed4d740a3b600ec..26a65f8c6cfb9ff3773c77ead6ceb5aec02c6952 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -7,12 +7,14 @@ from django.conf import settings from django.core.validators import FileExtensionValidator, MinValueValidator, MaxValueValidator from django.db.models import Q from django.forms.models import inlineformset_factory +from django.utils import timezone from django.utils.functional import cached_property from django.utils.translation import gettext_lazy as _ from bootstrap.forms import BootstrapFormMixin from core.models import (Project, Document, Metadata, DocumentMetadata, - DocumentPart, OcrModel, Transcription, + DocumentPart, OcrModel, OcrModelDocument, Transcription, + BlockType, LineType, AlreadyProcessingException) from users.models import User @@ -139,12 +141,12 @@ class DocumentProcessForm1(BootstrapFormMixin, forms.Form): if self.document.read_direction == self.document.READ_DIRECTION_RTL: self.initial['text_direction'] = 'horizontal-rl' self.fields['binarizer'].widget.attrs['disabled'] = True - self.fields['train_model'].queryset &= OcrModel.objects.filter(document=self.document) - self.fields['segtrain_model'].queryset &= OcrModel.objects.filter(document=self.document) - self.fields['seg_model'].queryset &= OcrModel.objects.filter(document=self.document) + self.fields['train_model'].queryset &= self.document.ocr_models.all() + self.fields['segtrain_model'].queryset &= self.document.ocr_models.all() + self.fields['seg_model'].queryset &= self.document.ocr_models.all() self.fields['ocr_model'].queryset &= OcrModel.objects.filter( - Q(document=None, script=document.main_script) - | Q(document=self.document)) + Q(documents=None, script=self.document.main_script) + | Q(documents=self.document)) self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) def process(self): @@ -183,16 +185,28 @@ class DocumentSegmentForm(DocumentProcessForm1): if data.get('upload_model'): model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['upload_model'].name.rsplit('.', 1)[0], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + executed_on=timezone.now(), + ) # Note: needs to save the file in a second step because the path needs the db PK model.file = data['upload_model'] model.save() elif data.get('seg_model'): model = data.get('seg_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'executed_on': timezone.now()} + ) + if not created: + ocr_model_document.executed_on = timezone.now() + ocr_model_document.save() else: model = None @@ -235,13 +249,25 @@ class DocumentTrainForm(DocumentProcessForm1): if data.get('train_model'): model = data.get('train_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'trained_on': timezone.now()} + ) + if not created: + ocr_model_document.trained_on = timezone.now() + ocr_model_document.save() elif data.get('upload_model'): model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['upload_model'].name.rsplit('.', 1)[0], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + trained_on=timezone.now(), + ) # Note: needs to save the file in a second step because the path needs the db PK model.file = data['upload_model'] model.save() @@ -249,10 +275,14 @@ class DocumentTrainForm(DocumentProcessForm1): elif data.get('new_model'): # file will be created by the training process model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['new_model'], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + trained_on=timezone.now(), + ) else: raise forms.ValidationError( @@ -290,12 +320,24 @@ class DocumentSegtrainForm(DocumentProcessForm1): if data.get('segtrain_model'): model = data.get('segtrain_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'trained_on': timezone.now()} + ) + if not created: + ocr_model_document.trained_on = timezone.now() + ocr_model_document.save() elif data.get('upload_model'): model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['upload_model'].name.rsplit('.', 1)[0], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + trained_on=timezone.now(), + ) # Note: needs to save the file in a second step because the path needs the db PK model.file = data['upload_model'] model.save() @@ -303,10 +345,14 @@ class DocumentSegtrainForm(DocumentProcessForm1): elif data.get('new_model'): # file will be created by the training process model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['new_model'], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + trained_on=timezone.now(), + ) else: @@ -339,16 +385,28 @@ class DocumentTranscribeForm(DocumentProcessForm1): if data.get('upload_model'): model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['upload_model'].name.rsplit('.', 1)[0], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + executed_on=timezone.now(), + ) # Note: needs to save the file in a second step because the path needs the db PK model.file = data['upload_model'] model.save() elif data.get('ocr_model'): model = data.get('ocr_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'executed_on': timezone.now()} + ) + if not created: + ocr_model_document.executed_on = timezone.now() + ocr_model_document.save() else: raise forms.ValidationError( _("Either select a name for your new model or an existing one.")) @@ -447,12 +505,12 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): if self.document.read_direction == self.document.READ_DIRECTION_RTL: self.initial['text_direction'] = 'horizontal-rl' self.fields['binarizer'].widget.attrs['disabled'] = True - self.fields['train_model'].queryset &= OcrModel.objects.filter(document=self.document) - self.fields['segtrain_model'].queryset &= OcrModel.objects.filter(document=self.document) - self.fields['seg_model'].queryset &= OcrModel.objects.filter(document=self.document) + self.fields['train_model'].queryset &= self.document.ocr_models.all() + self.fields['segtrain_model'].queryset &= self.document.ocr_models.all() + self.fields['seg_model'].queryset &= self.document.ocr_models.all() self.fields['ocr_model'].queryset &= OcrModel.objects.filter( - Q(document=None, script=document.main_script) - | Q(document=self.document)) + Q(documents=None, script=self.document.main_script) + | Q(documents=self.document)) self.fields['transcription'].queryset = Transcription.objects.filter(document=self.document) @cached_property @@ -498,14 +556,34 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): if task == self.TASK_TRAIN and data.get('train_model'): model = data.get('train_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'trained_on': timezone.now()} + ) + if not created: + ocr_model_document.trained_on = timezone.now() + ocr_model_document.save() elif task == self.TASK_SEGTRAIN and data.get('segtrain_model'): model = data.get('segtrain_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'trained_on': timezone.now()} + ) + if not created: + ocr_model_document.trained_on = timezone.now() + ocr_model_document.save() elif data.get('upload_model'): model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['upload_model'].name.rsplit('.', 1)[0], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + executed_on=timezone.now(), + ) # Note: needs to save the file in a second step because the path needs the db PK model.file = data['upload_model'] model.save() @@ -513,14 +591,34 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): elif data.get('new_model'): # file will be created by the training process model = OcrModel.objects.create( - document=self.parts[0].document, owner=self.user, name=data['new_model'], job=model_job) + OcrModelDocument.objects.create( + document=self.parts[0].document, + ocr_model=model, + trained_on=timezone.now(), + ) elif data.get('ocr_model'): model = data.get('ocr_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'executed_on': timezone.now()} + ) + if not created: + ocr_model_document.executed_on = timezone.now() + ocr_model_document.save() elif data.get('seg_model'): model = data.get('seg_model') + ocr_model_document, created = OcrModelDocument.objects.get_or_create( + ocr_model=model, + document=self.parts[0].document, + defaults={'executed_on': timezone.now()} + ) + if not created: + ocr_model_document.executed_on = timezone.now() + ocr_model_document.save() else: if task in (self.TASK_TRAIN, self.TASK_SEGTRAIN): raise forms.ValidationError( diff --git a/app/apps/core/migrations/0044_auto_20210520_1332.py b/app/apps/core/migrations/0044_auto_20210520_1332.py new file mode 100644 index 0000000000000000000000000000000000000000..585d06f9074a28221f6653a57d56b4d2b19f5c0d --- /dev/null +++ b/app/apps/core/migrations/0044_auto_20210520_1332.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.20 on 2021-05-20 13:32 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0043_auto_20210324_1016'), + ] + + operations = [ + migrations.AlterField( + model_name='document', + name='read_direction', + field=models.CharField(choices=[('ltr', 'Left to right'), ('rtl', 'Right to left')], default='ltr', help_text='The read direction describes the order of the elements in the document, in opposition with the text direction which describes the order of the words in a line and is set by the script.', max_length=3), + ), + ] diff --git a/app/apps/core/migrations/0045_auto_20210521_1034.py b/app/apps/core/migrations/0045_auto_20210521_1034.py new file mode 100644 index 0000000000000000000000000000000000000000..1deb56b1d079224722452b5809392605a2aeefc8 --- /dev/null +++ b/app/apps/core/migrations/0045_auto_20210521_1034.py @@ -0,0 +1,55 @@ +# Generated by Django 2.2.19 on 2021-05-21 10:34 + +from django.db import migrations, models +import django.db.models.deletion +from django.utils import timezone + + +def populate_m2m(apps, schema_editor): + OcrModel = apps.get_model('core', 'OcrModel') + OcrModelDocument = apps.get_model('core', 'OcrModelDocument') + + OcrModelDocument.objects.bulk_create([ + OcrModelDocument( + document_id=model.document_id, + ocr_model_id=model.id, + executed_on=timezone.now(), + ) for model in OcrModel.objects.exclude(document__isnull=True) + ]) + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0044_auto_20210520_1332'), + ] + + operations = [ + migrations.CreateModel( + name='OcrModelDocument', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('trained_on', models.DateTimeField(null=True)), + ('executed_on', models.DateTimeField(null=True)), + ('document', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='ocr_model_documents', to='core.Document')), + ('ocr_model', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='ocr_model_documents', to='core.OcrModel')), + ], + options={ + 'unique_together': {('document', 'ocr_model')}, + }, + ), + migrations.AddField( + model_name='ocrmodel', + name='documents', + field=models.ManyToManyField(related_name='ocr_models', through='core.OcrModelDocument', to='core.Document'), + ), + migrations.RunPython( + populate_m2m, + reverse_code=migrations.RunPython.noop, + ), + migrations.RemoveField( + model_name='ocrmodel', + name='document', + ), + ] diff --git a/app/apps/core/migrations/0046_auto_20210521_1444.py b/app/apps/core/migrations/0046_auto_20210521_1444.py new file mode 100644 index 0000000000000000000000000000000000000000..f2b7053905979a64a65fcf8ed6d94c8cf1f4894e --- /dev/null +++ b/app/apps/core/migrations/0046_auto_20210521_1444.py @@ -0,0 +1,43 @@ +# Generated by Django 2.2.20 on 2021-05-21 14:44 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('auth', '0011_update_proxy_permissions'), + ('core', '0045_auto_20210521_1034'), + ] + + operations = [ + migrations.AlterField( + model_name='document', + name='workflow_state', + field=models.PositiveSmallIntegerField(choices=[(0, 'Draft'), (2, 'Published'), (3, 'Archived')], default=0), + ), + migrations.CreateModel( + name='Project', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=512)), + ('slug', models.SlugField(unique=True)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('updated_at', models.DateTimeField(auto_now=True)), + ('owner', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)), + ('shared_with_groups', models.ManyToManyField(blank=True, related_name='shared_projects', to='auth.Group', verbose_name='Share with teams')), + ('shared_with_users', models.ManyToManyField(blank=True, related_name='shared_projects', to=settings.AUTH_USER_MODEL, verbose_name='Share with users')), + ], + options={ + 'ordering': ('-updated_at',), + }, + ), + migrations.AddField( + model_name='document', + name='project', + field=models.ForeignKey(null=True, on_delete=django.db.models.deletion.CASCADE, related_name='documents', to='core.Project'), + ), + ] diff --git a/app/apps/core/migrations/0047_datamigration_share_doc_to_proj.py b/app/apps/core/migrations/0047_datamigration_share_doc_to_proj.py new file mode 100644 index 0000000000000000000000000000000000000000..d42de99d2ee4210dd6473d8c21a8cf0812fe65fe --- /dev/null +++ b/app/apps/core/migrations/0047_datamigration_share_doc_to_proj.py @@ -0,0 +1,81 @@ +# Generated by Django 2.2.20 on 2021-05-10 13:06 +import time + +from django.db import migrations +from django.template.defaultfilters import slugify + + +def make_slug(proj, Project): + # models in migrations don't have access to models methods ;( + slug = slugify(proj.name) + # check unicity + exists = Project.objects.filter(slug=slug).count() + if not exists: + proj.slug = slug + else: + proj.slug = slug[:40] + hex(int(time.time()))[2:] + + proj.save() + + +def forwards(apps, schema_editor): + User = apps.get_model('users', 'User') + Project = apps.get_model('core', 'Project') + Document = apps.get_model('core', 'Document') + # create user projects + for user in User.objects.all(): + proj, created = Project.objects.get_or_create(name=user.username+"'s Project", + owner=user) + if not proj.slug: + make_slug(proj, Project) + # move documents into projects + user.document_set.update(project=proj) + # move share from docs to created projects + for doc in user.document_set.all(): + for share in doc.shared_with_users.all(): + proj.shared_with_users.add(share) + for share in doc.shared_with_groups.all(): + proj.shared_with_groups.add(share) + + # shared to draft + user.document_set.filter(workflow_state=1).update(workflow_state=0) + + # deal with documents without owner (shouldn't be any but let's be safe) + # move them to admin's + user = User.objects.filter(is_superuser=True).first() + proj, dummy = Project.objects.get_or_create(name=user.username+"'s Project", + owner=user) + if not proj.slug: + make_slug(proj, Project) + for doc in Document.objects.filter(owner=None): + doc.project = proj + doc.save() + # move share from docs to created projects + for doc in user.document_set.all(): + for share in doc.shared_with_users.all(): + proj.shared_with_users.add(share) + for share in doc.shared_with_groups.all(): + proj.shared_with_groups.add(share) + + +def backwards(apps, schema_editor): + Document = apps.get_model('core', 'Document') + for doc in Document.objects.all(): + if doc.project: + for share in doc.project.shared_with_users.all(): + doc.shared_with_users.add(share) + for share in doc.project.shared_with_groups.all(): + doc.shared_with_groups.add(share) + + Document.objects.update(project=None) + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0046_auto_20210521_1444'), + ] + + operations = [ + migrations.RunPython(forwards, backwards), + ] diff --git a/app/apps/core/migrations/0048_auto_20210521_1445.py b/app/apps/core/migrations/0048_auto_20210521_1445.py new file mode 100644 index 0000000000000000000000000000000000000000..4e410d45fd57cb3ba813f08f318998a6554068c5 --- /dev/null +++ b/app/apps/core/migrations/0048_auto_20210521_1445.py @@ -0,0 +1,27 @@ +# Generated by Django 2.2.20 on 2021-05-21 14:45 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0047_datamigration_share_doc_to_proj'), + ] + + operations = [ + migrations.RemoveField( + model_name='document', + name='shared_with_groups', + ), + migrations.RemoveField( + model_name='document', + name='shared_with_users', + ), + migrations.AlterField( + model_name='document', + name='project', + field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='documents', to='core.Project'), + ), + ] diff --git a/app/apps/core/models.py b/app/apps/core/models.py index 2269c8675c6f8fb2448bed472972b36c5e4958c6..a9fb102b2b42ee563a1b0402b2654604d38f8348 100644 --- a/app/apps/core/models.py +++ b/app/apps/core/models.py @@ -1117,7 +1117,7 @@ class LineTranscription(Versioned, models.Model): def models_path(instance, filename): fn, ext = os.path.splitext(filename) - return 'models/%d/%s%s' % (instance.document.pk, slugify(fn), ext) + return 'models/%d/%s%s' % (instance.owner.pk, slugify(fn), ext) class OcrModel(Versioned, models.Model): @@ -1139,12 +1139,12 @@ class OcrModel(Versioned, models.Model): training_accuracy = models.FloatField(default=0.0) training_total = models.IntegerField(default=0) training_errors = models.IntegerField(default=0) - document = models.ForeignKey(Document, - related_name='ocr_models', - default=None, on_delete=models.CASCADE) + documents = models.ManyToManyField(Document, + through='core.OcrModelDocument', + related_name='ocr_models') script = models.ForeignKey(Script, blank=True, null=True, on_delete=models.SET_NULL) - version_ignore_fields = ('name', 'owner', 'document', 'script', 'training') + version_ignore_fields = ('name', 'owner', 'documents', 'script', 'training') version_history_max_length = None # keep em all class Meta: @@ -1160,6 +1160,7 @@ class OcrModel(Versioned, models.Model): def segtrain(self, document, parts_qs, user=None): segtrain.delay(self.pk, + document.pk, list(parts_qs.values_list('pk', flat=True)), user_pk=user and user.pk or None) @@ -1212,6 +1213,17 @@ class OcrModel(Versioned, models.Model): super().delete_revision(revision) +class OcrModelDocument(models.Model): + document = models.ForeignKey(Document, on_delete=models.CASCADE, related_name='ocr_model_documents') + ocr_model = models.ForeignKey(OcrModel, on_delete=models.CASCADE, related_name='ocr_model_documents') + created_at = models.DateTimeField(auto_now_add=True) + trained_on = models.DateTimeField(null=True) + executed_on = models.DateTimeField(null=True) + + class Meta: + unique_together = (('document', 'ocr_model'),) + + @receiver(pre_delete, sender=DocumentPart, dispatch_uid='thumbnails_delete_signal') def delete_thumbnails(sender, instance, using, **kwargs): thumbnailer = get_thumbnailer(instance.image) diff --git a/app/apps/core/tasks.py b/app/apps/core/tasks.py index e0b37852df101755f1c90a5450114252ade8c9dc..b10c637c4bdbd48ae84f5dc4214fafc1eee90a93 100644 --- a/app/apps/core/tasks.py +++ b/app/apps/core/tasks.py @@ -3,6 +3,7 @@ import json import logging import numpy as np import os.path +import pathlib import shutil from itertools import groupby @@ -129,7 +130,7 @@ def make_segmentation_training_data(part): @shared_task(bind=True, autoretry_for=(MemoryError,), default_retry_delay=60 * 60) -def segtrain(task, model_pk, part_pks, user_pk=None): +def segtrain(task, model_pk, document_pk, part_pks, user_pk=None): # # Note hack to circumvent AssertionError: daemonic processes are not allowed to have children from multiprocessing import current_process current_process().daemon = False @@ -165,8 +166,7 @@ def segtrain(task, model_pk, part_pks, user_pk=None): try: model.training = True model.save() - document = model.document - send_event('document', document.pk, "training:start", { + send_event('document', document_pk, "training:start", { "id": model.pk, }) qs = DocumentPart.objects.filter(pk__in=part_pks) @@ -212,7 +212,7 @@ def segtrain(task, model_pk, part_pks, user_pk=None): model.new_version(file=new_version_filename) model.save() - send_event('document', document.pk, "training:eval", { + send_event('document', document_pk, "training:eval", { "id": model.pk, 'versions': model.versions, 'epoch': epoch, @@ -233,7 +233,7 @@ def segtrain(task, model_pk, part_pks, user_pk=None): id="seg-no-gain-error", level='danger') except Exception as e: - send_event('document', document.pk, "training:error", { + send_event('document', document_pk, "training:error", { "id": model.pk, }) if user: @@ -250,7 +250,7 @@ def segtrain(task, model_pk, part_pks, user_pk=None): model.training = False model.save() - send_event('document', document.pk, "training:done", { + send_event('document', document_pk, "training:done", { "id": model.pk, }) @@ -352,8 +352,7 @@ def train_(qs, document, transcription, model=None, user=None): filename = slugify(model.name) + '.mlmodel' upload_to = model.file.field.upload_to(model, filename) fulldir = os.path.join(settings.MEDIA_ROOT, os.path.split(upload_to)[0], '') - if not os.path.exists(fulldir): - os.mkdir(fulldir) + pathlib.Path(fulldir).mkdir(parents=True, exist_ok=True) modelpath = os.path.join(fulldir, filename) model.file = upload_to model.save() diff --git a/app/apps/core/tests/factory.py b/app/apps/core/tests/factory.py index 98ffc303894f59f7dbb7ab3f4de9dd54740a00d0..f80e24ee9a92a8865e8555e6802f0be02610c3c9 100644 --- a/app/apps/core/tests/factory.py +++ b/app/apps/core/tests/factory.py @@ -6,6 +6,7 @@ import os.path from django.conf import settings from django.core.files.uploadedfile import SimpleUploadedFile from django.test import TestCase +from django.utils import timezone from django_redis import get_redis_connection from kraken.lib import vgsl @@ -86,13 +87,15 @@ class CoreFactory(): fp = os.path.join(os.path.dirname(__file__), 'assets', asset_name) return open(fp, 'rb') - def make_model(self, job=OcrModel.MODEL_JOB_RECOGNIZE, document=None): + def make_model(self, document, job=OcrModel.MODEL_JOB_RECOGNIZE): spec = '[1,48,0,1 Lbx100 Do O1c10]' nn = vgsl.TorchVGSLModel(spec) model_name = 'test-model.mlmodel' model = OcrModel.objects.create(name=model_name, - document=document, + owner=document.owner, job=job) + + document.ocr_models.add(model) modeldir = os.path.join(settings.MEDIA_ROOT, os.path.split( model.file.field.upload_to(model, 'test-model.mlmodel'))[0]) if not os.path.exists(modeldir): diff --git a/app/apps/core/tests/tasks.py b/app/apps/core/tests/tasks.py index 00087bb60f68fc7fbc037e04cb2f2075f2851040..10ebf07c65ec5f4644b93c3c4333b058b89f49a2 100644 --- a/app/apps/core/tests/tasks.py +++ b/app/apps/core/tests/tasks.py @@ -68,7 +68,7 @@ class TasksTestCase(CoreFactoryTestCase): def test_train_existing_transcription_model(self): self.makeTranscriptionContent() - model = self.factory.make_model(document=self.part.document) + model = self.factory.make_model(self.part.document) self.client.force_login(self.part.document.owner) uri = reverse('document-parts-process', kwargs={'pk': self.part.document.pk}) with self.assertNumQueries(17): diff --git a/app/apps/core/views.py b/app/apps/core/views.py index 00cfc0fa20250ecad7d49501db2d199593e8aabb..d8cfd458e7ce1edf8f7b27949464e1f0a9cf1fd6 100644 --- a/app/apps/core/views.py +++ b/app/apps/core/views.py @@ -307,7 +307,7 @@ class ModelsList(LoginRequiredMixin, ListView): self.document = Document.objects.for_user(self.request.user).get(pk=self.kwargs.get('document_pk')) except Document.DoesNotExist: raise PermissionDenied - return OcrModel.objects.filter(document=self.document) + return self.document.ocr_models.all() else: self.document = None return OcrModel.objects.filter(owner=self.request.user) diff --git a/app/apps/imports/parsers.py b/app/apps/imports/parsers.py index cfda3f0fe2859fdf14aad34079b0521c1c20e645..340cb723fbe8352034fa033120ca23f08b54635d 100644 --- a/app/apps/imports/parsers.py +++ b/app/apps/imports/parsers.py @@ -67,14 +67,16 @@ class ParserDocument: class PdfParser(ParserDocument): + def __init__(self, document, file_handler, report): + super().__init__(document, file_handler, report) + pyvips.voperation.cache_set_max(10) # 0 = no parallelisation at all; default is 1000 + def validate(self): try: - self.doc = pyvips.Image.new_from_buffer(self.file.read(), "", - dpi=300, n=-1, - access="sequential") + self.doc = pyvips.Image.pdfload_buffer(self.file.read(), n=-1, access='sequential') except pyvips.error.Error as e: logger.exception(e) - raise ParseError(_("Invalid pdf file.")) + raise ParseError(_("Invalid PDF file.")) @property def total(self): @@ -85,17 +87,16 @@ class PdfParser(ParserDocument): def parse(self, start_at=0, override=False, user=None): buff = self.file.read() - doc = pyvips.Image.new_from_buffer(buff, "", - dpi=300, n=-1, - access="sequential") + doc = pyvips.Image.pdfload_buffer(buff, n=-1, access='sequential') n_pages = doc.get('n-pages') try: for page_nb in range(start_at, n_pages): - page = pyvips.Image.new_from_buffer(buff, "", dpi=300, - access="sequential", - page=page_nb) + page = pyvips.Image.pdfload_buffer(buff, + page=page_nb, + dpi=300, + access='sequential') part = DocumentPart(document=self.document) - fname = '%s_page_%d.png' % (self.file.name, page_nb+1) + fname = '%s_page_%d.png' % (self.file.name.rsplit('/')[-1], page_nb+1) part.image.save(fname, ContentFile(page.write_to_buffer('.png'))) part.original_filename = fname part.save() diff --git a/app/apps/users/urls.py b/app/apps/users/urls.py index e3aa844ec11047910b94461f1c68f1593fadcdae..fe7530b32fee6445352088bf05dfde2cb79cddc9 100644 --- a/app/apps/users/urls.py +++ b/app/apps/users/urls.py @@ -1,7 +1,8 @@ from django.urls import path, include from users.views import (SendInvitation, AcceptInvitation, AcceptGroupInvitation, ContactUsView, - ProfileInfos, ProfileGroupListCreate, ProfileApiKey, ProfileFiles, + ProfileInfos, ProfileGroupListCreate, ProfileApiKey, + ProfileFiles, ProfileInvitations, GroupDetail, RemoveFromGroup, LeaveGroup, TransferGroupOwnership) from django.contrib.auth.decorators import permission_required @@ -11,6 +12,7 @@ urlpatterns = [ path('profile/apikey/', ProfileApiKey.as_view(), name='profile-api-key'), path('profile/files/', ProfileFiles.as_view(), name='profile-files'), path('profile/teams/', ProfileGroupListCreate.as_view(), name='profile-team-list'), + path('profile/invitations/', ProfileInvitations.as_view(), name='profile-invites-list'), path('teams/<int:pk>/', GroupDetail.as_view(), name='team-detail'), path('teams/<int:pk>/remove/', RemoveFromGroup.as_view(), name='team-remove-user'), path('teams/<int:pk>/leave/', LeaveGroup.as_view(), name='team-leave'), diff --git a/app/apps/users/views.py b/app/apps/users/views.py index 9eb56f23eeca70d38f8f6f06788949cbd5eb7442..03cd6379dd38bf3034fcdabce3255a8d71243bfa 100644 --- a/app/apps/users/views.py +++ b/app/apps/users/views.py @@ -189,6 +189,19 @@ class ProfileFiles(LoginRequiredMixin, TemplateView): return context +class ProfileInvitations(LoginRequiredMixin, TemplateView): + template_name = 'users/profile_invitations.html' + + def get_context_data(self, *args, **kwargs): + context = super().get_context_data(*args, **kwargs) + invites = self.request.user.invitations_sent.all() + paginator = Paginator(invites, 25) + context['is_paginated'] = paginator.count != 0 + page_number = self.request.GET.get('page') + context['page_obj'] = paginator.get_page(page_number) + return context + + class ProfileGroupListCreate(LoginRequiredMixin, SuccessMessageMixin, CreateView): """ Both were we create new groups and list them diff --git a/app/escriptorium/templates/core/document_part_edit.html b/app/escriptorium/templates/core/document_part_edit.html index cb47e1a43f0ff17484bc806b106c5d729aa242fd..0cb1a7128de5f7ab355736a8fe435217f81bc1af 100644 --- a/app/escriptorium/templates/core/document_part_edit.html +++ b/app/escriptorium/templates/core/document_part_edit.html @@ -4,12 +4,12 @@ {% block body %} <div id="editor"> - <editor :document-id="'{{document.id}}'" - :document-name="'{{document.name}}'" - :part-id="'{{part.id}}'" - :default-text-direction="'{{ document.default_text_direction }}'" - :main-text-direction="'{{ document.main_script.text_direction }}'" - :read-direction="'{{ document.read_direction }}'"> + <editor :document-id="'{{document.id}}'" + :document-name="'{{ document.name|escapejs }}'" + :part-id="'{{part.id}}'" + :default-text-direction="'{{ document.default_text_direction }}'" + :main-text-direction="'{{ document.main_script.text_direction }}'" + :read-direction="'{{ document.read_direction }}'"> <a href="{% if object %}{% url 'document-update' pk=document.pk %}{% endif %}" class="nav-item nav-link {% block nav-doc-active %}{% endblock %}" id="nav-doc-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Description" %}</a> <a href="{% if object %}{% url 'document-images' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link {% if not object %}disabled{% endif %} {% block nav-images-active %}{% endblock %}" id="nav-img-tab" role="tab" aria-controls="nav-img" aria-selected="true">{% trans "Images" %}</a> <a href="{% if document %}{% url 'document-part-edit' pk=document.pk %}{% else %}#{% endif %}" class="nav-item nav-link active {% if not object or not document.parts.count %}disabled{% endif %}" id="nav-edit-tab" role="tab" aria-controls="nav-doc" aria-selected="true">{% trans "Edit" %}</a> diff --git a/app/escriptorium/templates/users/profile.html b/app/escriptorium/templates/users/profile.html index 7d950acd7edff5bab0ef3acf7f8f1589718b72f6..eb4f04990abeb040adee7c241e796a16b3f976b3 100644 --- a/app/escriptorium/templates/users/profile.html +++ b/app/escriptorium/templates/users/profile.html @@ -9,6 +9,7 @@ <a class="nav-link {% block key-tab-active %}{% endblock %}" id="nav-key-tab" href="{% url 'profile-api-key' %}" role="tab">{% trans "Api key" %}</a> <a class="nav-link {% block files-tab-active %}{% endblock %}" id="nav-files-tab" href="{% url 'profile-files' %}" role="tab">{% trans "Files" %}</a> <a class="nav-link {% block team-tab-active %}{% endblock %}" id="nav-infos-tab" href="{% url 'profile-team-list' %}" role="tab">{% trans "Teams" %}</a> + <a class="nav-link {% block invites-tab-active %}{% endblock %}" id="nav-infos-tab" href="{% url 'profile-invites-list' %}" role="tab">{% trans "Invitations" %}</a> </div> <div class="col-md-8 tab-content" id="v-pills-tabContent"> diff --git a/app/escriptorium/templates/users/profile_api_key.html b/app/escriptorium/templates/users/profile_api_key.html index 48e1fe1d5cbb66397545879cc9a0fa2acce3d709..59f1529846168610bf208627c5726f3e28476b4a 100644 --- a/app/escriptorium/templates/users/profile_api_key.html +++ b/app/escriptorium/templates/users/profile_api_key.html @@ -3,8 +3,6 @@ {% block infos-tab-active %}{% endblock %} {% block key-tab-active %}active{% endblock %} -{% block files-tab-active %}{% endblock %} -{% block team-tab-active %}{% endblock %} {% block tab-content %} {% trans "API Authentication Token:" %} {{ api_auth_token.key }} diff --git a/app/escriptorium/templates/users/profile_files.html b/app/escriptorium/templates/users/profile_files.html index 9849d42183ac7c6a189a0f0ff225190301ccb432..a4277c5a9b15299ae8087e4768cae7e9779fbafc 100644 --- a/app/escriptorium/templates/users/profile_files.html +++ b/app/escriptorium/templates/users/profile_files.html @@ -2,9 +2,7 @@ {% load i18n static %} {% block infos-tab-active %}{% endblock %} -{% block key-tab-active %}{% endblock %} {% block files-tab-active %}active{% endblock %} -{% block team-tab-active %}{% endblock %} {% block tab-content %} {% for fpath, fname in page_obj %} diff --git a/app/escriptorium/templates/users/profile_group_list.html b/app/escriptorium/templates/users/profile_group_list.html index 3a7b855f17bd40eab0a437e2a3a1458c85a5ff31..728f47a4b08703bfdba6f47ebee51a410102d9f6 100644 --- a/app/escriptorium/templates/users/profile_group_list.html +++ b/app/escriptorium/templates/users/profile_group_list.html @@ -2,12 +2,9 @@ {% load i18n bootstrap static %} {% block infos-tab-active %}{% endblock %} -{% block key-tab-active %}{% endblock %} -{% block files-tab-active %}{% endblock %} {% block team-tab-active %}active{% endblock %} {% block tab-content %} - <h4>{% trans "Create a new Team" %}</h4> <form method="post"> {% csrf_token %} diff --git a/app/escriptorium/templates/users/profile_invitations.html b/app/escriptorium/templates/users/profile_invitations.html new file mode 100644 index 0000000000000000000000000000000000000000..60be1c3a9718c12709ea15767ae84106d7fb1b26 --- /dev/null +++ b/app/escriptorium/templates/users/profile_invitations.html @@ -0,0 +1,21 @@ +{% extends "users/profile.html" %} +{% load i18n static %} + +{% block infos-tab-active %}{% endblock %} +{% block invites-tab-active %}active{% endblock %} + +{% block tab-content %} +<table class="table"> +{% for invite in page_obj %} + <tr> + <td>{{invite.recipient_email|default:invite.recipient}}</td> + <td title="Into group">{{invite.group|default:""}}</td> + <td>{{invite.get_workflow_state_display}}</td> + </tr> +{% empty %} +{% trans "You didn't send any invitations yet." %} +{% endfor %} +</table> + +{% include "includes/pagination.html" %} +{% endblock %} diff --git a/app/requirements.txt b/app/requirements.txt index 578411a956c52c1b8fe894d5f9f3482d30970226..cb1e62c76e846723fabffbe31e541d9e96e63c70 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -12,7 +12,7 @@ django-redis==4.10.0 psycopg2-binary==2.7.6 django-ordered-model==3.1.1 easy-thumbnails==2.5 -git+https://github.com/mittagessen/kraken.git@3.0b23#egg=kraken +git+https://github.com/mittagessen/kraken.git@3.0b24#egg=kraken django-cleanup==5.1.0 djangorestframework==3.9.2 drf-nested-routers==0.91 diff --git a/docker-compose.override.yml_example b/docker-compose.override.yml_example index 706d9693becda40e375477fa80e6643d964c879e..4e5e10e99352c1a29897c9265f9fce7188894196 100644 --- a/docker-compose.override.yml_example +++ b/docker-compose.override.yml_example @@ -3,8 +3,6 @@ version: "3.9" services: ### to customize the homepage, uncomment this #app: - # environment: - # - CUSTOM_HOME=True # volumes: # - $PWD/app/homepage diff --git a/docker-compose.yml b/docker-compose.yml index f8eed4a90277e5ea3f0283b2f06054db2e19b586..e596ec99fcef606d30031fc732f7a3837c050596 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,7 +18,6 @@ services: expose: - 8000 - channelserver: <<: *app command: daphne --bind 0.0.0.0 --port 5000 -v 1 escriptorium.asgi:application diff --git a/front/vue/components/DiploPanel.vue b/front/vue/components/DiploPanel.vue index 82409240c3d97e54fa341ce7953f0d8dbfc8d93d..e14f4e9e822f353341cdd6264126479e68fb4065 100644 --- a/front/vue/components/DiploPanel.vue +++ b/front/vue/components/DiploPanel.vue @@ -87,6 +87,8 @@ export default Vue.extend({ }.bind(this)); this.refresh(); + + }, methods: { empty() { @@ -250,35 +252,143 @@ export default Vue.extend({ tmp.remove(); return clean; }, + onPaste(e) { - let pastedData = e.clipboardData.getData('text/plain'); - let pasted_data_split = pastedData.split('\n'); + + let diplomaticLines=document.querySelector("#diplomatic-lines"); + let sel = window.getSelection(); + let tmpDiv = document.createElement('div'); - if (pasted_data_split.length == 1) { - let content = this.cleanSource(pastedData); - document.execCommand('insertText', false, content); - } else { - const selection = window.getSelection(); - let range = selection.getRangeAt(0); - let target = range.startContainer.nodeType==Node.TEXT_NODE?range.startContainer.parentNode:range.startContainer; - let start = Array.prototype.indexOf.call(this.$refs.diplomaticLines.children, target); - let newDiv, child = this.$refs.diplomaticLines.children[start]; - for (let i = 0; i < pasted_data_split.length; i++) { - newDiv = this.appendLine(child); - newDiv.textContent = this.cleanSource(pasted_data_split[i]); - // trick to get at least 'some' ctrl+z functionality - // this fails in spectacular ways differently in firefox and chrome... so no ctrl+z - /* range.setStart(newDiv, 0); - * selection.removeAllRanges(); - * selection.addRange(range); - * document.execCommand("insertText", false, this.cleanSource(content)); */ + let pastedData; + if (e && e.clipboardData && e.clipboardData.types && e.clipboardData.getData) { + let types = e.clipboardData.types; + if (((types instanceof DOMStringList) && types.contains("text/html")) || (types.indexOf && types.indexOf('text/html') !== -1)) { + let content = e.clipboardData.getData('text/html'); + tmpDiv.innerHTML = content; + pastedData = [ ...tmpDiv.childNodes].map(e=>e.textContent).join('\n'); + } else { + pastedData = e.clipboardData.getData('text/plain'); + } + + var cursor = sel.getRangeAt(0); // specific posiiton or range + // for a range, delete content to clean data and to get resulting specific cursor position from it: + cursor.deleteContents(); // if selection is done on several lines, cursor caret be placed between 2 divs + + // after deleting (for an range), + // check if resulting cursor is in or off a line div or some errors will occur!: + let parentEl = sel.getRangeAt(0).commonAncestorContainer; + if (parentEl.nodeType != 1) { + parentEl = parentEl.parentNode; // for several different lines, commonAncestorContainer does not exist + } + + let pasted_data_split = pastedData.split("\n"); + let refNode = parentEl; + + let textBeforeCursor = ''; + let textAfterCursor = ''; + + // nodes which will be placed before and after the targetnode - where text is pasted (new node or current node) + let prevSibling; + let nextSibling; + + if(parentEl.id == 'diplomatic-lines'){ // if parent node IS the main diplomatic panel div = cursor is offline + // occurs when a selection is made on several lines or all is selected + + //we create a between node: + refNode = document.createElement('div'); + refNode.textContent = ''; + + // paste text on the selection (cursor position or range): + cursor.insertNode(refNode); + + // set caret position/place the cursor into the new node: + cursor.setStart(refNode,0); + cursor.setEnd(refNode,0); + + // in this case, contents before and after selection will belong to near siblings + if(refNode.previousSibling != null){ + prevSibling = refNode.previousSibling; + } + if(refNode.nextSibling != null){ + nextSibling = refNode.nextSibling; + } + } + + // get current cursor position withing the line div tag + let caretPos = cursor.endOffset; // 4 // nombre de caractères du début jusqu'à la position du curseur + + // store previous and next text in the line to it / for a selection wihtin on line: + textBeforeCursor = refNode.textContent.substring(0, caretPos); + textAfterCursor = refNode.textContent.substring(caretPos, refNode.textContent.length); + + // for a selection between several lines, contents before and after will be the contents of siblings + // to avoid create new lines before and after, fusion of sibling contents to the current node and removing it + if(typeof(prevSibling) != "undefined"){ + textBeforeCursor = prevSibling.textContent; + prevSibling.parentNode.removeChild(prevSibling); + } + if(typeof(nextSibling) != "undefined"){ + textAfterCursor = nextSibling.textContent; + nextSibling.parentNode.removeChild(nextSibling); + } + + let endPos = 0; // will set the new cursor position + let lastTargetNode = refNode; // last impacted node for a copy-paste (for several lines) + + if(pasted_data_split.length == 1){ + refNode.textContent = textBeforeCursor + pasted_data_split[0] + textAfterCursor; + endPos = String(textBeforeCursor + pasted_data_split[0]).length; + } + else{ + // store resulting firstLine & lastLine contents regarding cursor position + let firstLine = textBeforeCursor + pasted_data_split[0]; + let lastLine = pasted_data_split[pasted_data_split.length -1] + textAfterCursor; + let nextNodesContents = new Array(); + + for(var j=0; j < pasted_data_split.length; j++) + { + var lineContent = pasted_data_split[j]; + if(j == 0) + lineContent = firstLine; + if(j == pasted_data_split.length-1) + lineContent = lastLine; + nextNodesContents.push(lineContent); + } + // get length of last pasted line to set new caret position + endPos = String(pasted_data_split[pasted_data_split.length-1]).length; + + refNode.textContent = nextNodesContents[nextNodesContents.length-1]; + lastTargetNode = refNode; + + nextNodesContents = nextNodesContents.reverse(); + + for(var j=1; j < nextNodesContents.length; j++) // for any other line, we add a div and set this content + { + var prevLineDiv = document.createElement('div'); + prevLineDiv.textContent = nextNodesContents[j]; + // add the new line as a next neighbor of current div: + refNode = diplomaticLines.insertBefore(prevLineDiv, refNode); + } } + // set the caret position right after the pasted content: + + if(typeof(lastTargetNode.childNodes[0]) != "undefined") + { + let textNode = lastTargetNode.childNodes[0]; + cursor.setStart(textNode, endPos); + } + + } else { + // not sure if this can actually happen in firefox/chrome?! + pastedData = ""; + // so we do nothing; keeping original content } - this.$refs.saveNotif.classList.remove('hide'); - this.constrainLineNumber(); + // Stop the data from actually being pasted // without it will paste the native copied text after "content" + e.stopPropagation(); e.preventDefault(); }, + showOverlay(ev) { let target = ev.target.nodeType==Node.TEXT_NODE?ev.target.parentNode:ev.target; let index = Array.prototype.indexOf.call(target.parentNode.children, target); diff --git a/front/vue/components/TranscriptionModal.vue b/front/vue/components/TranscriptionModal.vue index a610323983bab8e97eb5a8cef0833128e32bdcdb..9cfb95a8b41b5cece45d5c526c73168258d4c7bc 100644 --- a/front/vue/components/TranscriptionModal.vue +++ b/front/vue/components/TranscriptionModal.vue @@ -10,7 +10,7 @@ <button v-if="$store.state.document.readDirection == 'rtl'" type="button" id="next-btn" - @click="$store.dispatch('lines/editLine', 'next')" + @click="editLine('next')" title="Next (up arrow)" class="btn btn-sm mr-1 btn-secondary"> <i class="fas fa-arrow-circle-left"></i> @@ -18,7 +18,7 @@ <button v-else type="button" id="prev-btn" - @click="$store.dispatch('lines/editLine', 'previous')" + @click="editLine('previous')" title="Previous (up arrow)" class="btn btn-sm mr-1 btn-secondary"> <i class="fas fa-arrow-circle-left"></i> @@ -27,7 +27,7 @@ <button v-if="$store.state.document.readDirection == 'rtl'" type="button" id="prev-btn" - @click="$store.dispatch('lines/editLine', 'previous')" + @click="editLine('previous')" title="Previous (down arrow)" class="btn btn-sm mr-1 btn-secondary"> <i class="fas fa-arrow-circle-right"></i> @@ -35,7 +35,7 @@ <button v-else type="button" id="next-btn" - @click="$store.dispatch('lines/editLine', 'next')" + @click="editLine('next')" title="Next (down arrow)" class="btn btn-sm mr-1 btn-secondary"> <i class="fas fa-arrow-circle-right"></i> @@ -71,9 +71,9 @@ <div id="trans-input-container" ref="transInputContainer"> <input v-if="$store.state.document.mainTextDirection != 'ttb'" - v-on:keyup.down="$store.dispatch('lines/editLine', 'next')" - v-on:keyup.up="$store.dispatch('lines/editLine', 'previous')" - v-on:keyup.enter="$store.dispatch('lines/editLine', 'next')" + v-on:keyup.down="editLine('next')" + v-on:keyup.up="editLine('previous')" + v-on:keyup.enter="editLine('next')" id="trans-input" ref="transInput" name="content" @@ -95,9 +95,9 @@ <div id="textInputBorderWrapper" class="form-control mb-2"> <div v-on:blur="localTranscription = $event.target.textContent" v-on:keyup="recomputeInputCharsScaleY()" - v-on:keyup.right="$store.dispatch('lines/editLine', 'next')" - v-on:keyup.left="$store.dispatch('lines/editLine', 'previous')" - v-on:keyup.enter="cleanHTMLTags();recomputeInputCharsScaleY();$store.dispatch('lines/editLine', 'next')" + v-on:keyup.right="editLine('next')" + v-on:keyup.left="editLine('previous')" + v-on:keyup.enter="cleanHTMLTags();recomputeInputCharsScaleY();editLine('next')" v-html="localTranscription" id="vertical_text_input" contenteditable="true"> @@ -283,6 +283,13 @@ export default Vue.extend({ close() { $(this.$refs.transModal).modal('hide'); }, + + editLine(direction) { + // making sure the line is saved (it isn't in case of shortcut usage) + this.localTranscription = this.$refs.transInput.value; + this.$store.dispatch('lines/editLine', direction); + }, + cleanHTMLTags(){ document.getElementById("vertical_text_input").innerHTML = document.getElementById("vertical_text_input").textContent; }, diff --git a/variables.env_example b/variables.env_example index 69e9c245e24702af7af536471100947411926c2d..e0b4656925645061a7faa5ac914d4be43a0b0865 100644 --- a/variables.env_example +++ b/variables.env_example @@ -20,3 +20,5 @@ FLOWER_BASIC_AUTH=flower:changeme # set shm_size in yml file! KRAKEN_TRAINING_LOAD_THREADS=8 + +# CUSTOM_HOME=True