Commit 52e89f4a authored by Lucas Terriel's avatar Lucas Terriel 🐍

ADD : script perl to convert xml2ead + new pipeline notebook XML2text + new rawtext dataset

parent 4461eaf6
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -312,7 +312,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
"version": "3.7.9"
}
},
"nbformat": 4,
#!/usr/bin/perl
use 5.010;
use strict;
use warnings;
use XML::LibXML;
###############################
# SCRIPT TO CONVERT XML TO TEXT
###############################
# Initialize file
my $filename = './dataset/FRAN_IR_000061.xml';
# Initialize destination
my $des = 'test_p.txt';
# Initialize xpath
my $xpath = '//unittitle | //unitdate | //scopecontent';
my $dom = XML::LibXML->load_xml(location => $filename);
open(fh, '>', $des) or die $!;
foreach my $title ($dom->findnodes($xpath)) {
print fh $title->to_literal();
}
close(fh) or "Couldn't close the file."
# TODO : - resolve bad encoding / tabulation
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment