diff --git a/app/apps/core/forms.py b/app/apps/core/forms.py index 167f6cb7c8c7e5fa6b7b3856fbffcf32fe6fc6d6..6d5973b35a0246fc9e29a4375240b4894e377f7d 100644 --- a/app/apps/core/forms.py +++ b/app/apps/core/forms.py @@ -120,6 +120,8 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): ) segmentation_steps = forms.ChoiceField(choices=SEGMENTATION_STEPS_CHOICES, initial='lines', required=False) + seg_model = forms.ModelChoiceField(queryset=OcrModel.objects.filter(job=OcrModel.MODEL_JOB_SEGMENT), + label=_("Model"), required=False) override = forms.BooleanField(required=False, initial=False, help_text=_("If checked, deletes existing segmentation <b>and bound transcriptions</b> first!")) TEXT_DIRECTION_CHOICES = (('horizontal-lr', _("Horizontal l2r")), @@ -142,8 +144,6 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): transcription = forms.ModelChoiceField(queryset=Transcription.objects.all(), required=False) # segtrain - seg_model = forms.ModelChoiceField(queryset=OcrModel.objects.filter(job=OcrModel.MODEL_JOB_SEGMENT), - label=_("Model"), required=False) segtrain_model = forms.ModelChoiceField(queryset=OcrModel.objects.filter(job=OcrModel.MODEL_JOB_SEGMENT), label=_("Model"), required=False) @@ -162,6 +162,7 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): self.fields['binarizer'].widget.attrs['disabled'] = True self.fields['train_model'].queryset &= OcrModel.objects.filter(document=self.document) self.fields['segtrain_model'].queryset &= OcrModel.objects.filter(document=self.document) + self.fields['seg_model'].queryset &= OcrModel.objects.filter(document=self.document) self.fields['ocr_model'].queryset = OcrModel.objects.filter( Q(document=None, script=document.main_script) | Q(document=self.document)) @@ -200,7 +201,9 @@ class DocumentProcessForm(BootstrapFormMixin, forms.Form): data = super().clean() task = data.get('task') - if task == self.TASK_SEGTRAIN: + if task == self.TASK_SEGMENT: + model_job = OcrModel.MODEL_JOB_SEGMENT + elif task == self.TASK_SEGTRAIN: model_job = OcrModel.MODEL_JOB_SEGMENT if len(self.parts) < 2: raise forms.ValidationError("Segmentation training requires at least 2 images.") diff --git a/app/apps/core/models.py b/app/apps/core/models.py index d40d81db48e9d9a963e740676656e7832a14a5f5..dba2bdaa45bf28456bfa4fc394adbc793069aa76 100644 --- a/app/apps/core/models.py +++ b/app/apps/core/models.py @@ -583,27 +583,28 @@ class DocumentPart(OrderedModel): options['model'] = model.file.path else: options['model'] = settings.KRAKEN_DEFAULT_SEGMENTATION_MODEL - blocks = self.blocks.all() - if blocks: - for block in blocks: - if block.box[2] < block.box[0] + 10 or block.box[3] < block.box[1] + 10: - continue - ic = im.crop(block.box) - res = blla.segment(ic, **options) - # if script_detect: - # res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) - for line in res['lines']: - Line.objects.create( - document_part=self, block=block, - box=(line[0]+block.box[0], line[1]+block.box[1], - line[2]+block.box[0], line[3]+block.box[1])) - else: - res = blla.segment(im, **options) - for line in res['lines']: - newline = Line.objects.create( - document_part=self, - baseline=line['baseline'], - mask=line['boundary'] if line['boundary'] is not None else None) + # blocks = self.blocks.all() + # if blocks: + # for block in blocks: + # if block.box[2] < block.box[0] + 10 or block.box[3] < block.box[1] + 10: + # continue + # ic = im.crop(block.box) + # res = blla.segment(ic, **options) + # # if script_detect: + # # res = pageseg.detect_scripts(im, res, valid_scripts=allowed_scripts) + # for line in res['lines']: + # Line.objects.create( + # document_part=self, block=block, + # box=(line[0]+block.box[0], line[1]+block.box[1], + # line[2]+block.box[0], line[3]+block.box[1])) + # else: + res = blla.segment(im, **options) + for line in res['lines']: + mask = line['boundary'] if line['boundary'] is not None else None + newline = Line.objects.create( + document_part=self, + baseline=line['baseline'], + mask=mask) self.workflow_state = self.WORKFLOW_STATE_SEGMENTED self.save() diff --git a/app/requirements.txt b/app/requirements.txt index b03ca76b0473707651a295a20c6ba25e987c69dd..0e52d8128079bbd3219eca71f81f457859df4f9b 100644 --- a/app/requirements.txt +++ b/app/requirements.txt @@ -20,3 +20,4 @@ drf-nested-routers==0.91 bleach==3.1.0 beautifulsoup4==4.7.1 requests==2.21.0 +numpy>=1.17