Top Prev Next Up Down TEST

ExRo

Building a MapPool from UnicodeData.txt and using the results in a match with \p{} notation.

001| with Ada.Text_IO; use Ada.Text_IO;
002| with Ada.Command_Line;
003| with Ada.Containers.Ordered_Sets;
004| with Y2018.Text.Jets.RangeVectorPack;
005| with Y2018.Text.Core; use Y2018.Text.Core;
006| with Y2018.Text.Core.Str; use Y2018.Text.Core.Str;
007| with Y2018.Text.Core.CVarPack; use Y2018.Text.Core.CVarPack;
008| with Y2018.Text.Util.UrvPack;
009| with Y2018.Text.Jets; use Y2018.Text.Jets;
010| with Y2018.Text.Jets.MatchPack;
011| with Y2018.Text.Jets.PatternPack;
012| --with DSECT;
013| procedure ExRo is
014| __mp:Y2018.Text.Util.UrvPack.MapPool;
015| __uval:Y2018.Text.Jets.RangeVectorPack.Set_TY.Set;
016| __uval2:Y2018.Text.Jets.RangeVectorPack.Set_TY.Set;
017| __unicodeDataFile:constant String:="UCDlink/UnicodeData.txt"; -- Change path to your UCD-directory
018| __p:PatternPack.Pattern_AC:=new PatternPack.Pattern;
019| __m:MatchPack.Match_TY;
020| __nextPos : Integer:=0;
021| __source_value:CFix:="ulf9_bas_"c;
022| begin
023| __Y2018.Text.Util.UrvPack.insert(mp,null_CVar & "DIG"c);
024| __uval:=Y2018.Text.Util.UrvPack.UnicodeData(unicodeDataFile,Character_name=>"^DIGIT"c,ShowLineCount=>1000);
025| __uval:=Y2018.Text.Util.UrvPack.Compress(uval);
026| __Y2018.Text.Util.UrvPack.list_Set(uval,"my DIGIT"c,"exro_DIGIT.lst");
027| __Y2018.Text.Util.UrvPack.put_Set(mp,null_CVar & "DIG"c,uval);
028| __Ada.Text_IO.Put_Line ("DIGIT");
029| __--******************************
030| __Y2018.Text.Util.UrvPack.insert(mp,null_CVar & "LAT"c);
031| __uval:=Y2018.Text.Util.UrvPack.UnicodeData(unicodeDataFile,Character_name=>"^LATIN"c,ShowLineCount=>1000);
032| __uval:=Y2018.Text.Util.UrvPack.Compress(uval);
033| __Y2018.Text.Util.UrvPack.list_Set(uval,"my LATIN"c,"exro_LATIN.lst");
034| __Y2018.Text.Util.UrvPack.put_Set(mp,null_CVar & "LAT"c,uval);
035| __Ada.Text_IO.Put_Line ("LATIN");
036| __--******************************
037| __Y2018.Text.Util.UrvPack.insert(mp,null_CVar & "GRE"c);
038| __uval:=Y2018.Text.Util.UrvPack.UnicodeData(unicodeDataFile,Character_name=>"^GREEK"c,ShowLineCount=>1000);
039| __uval:=Y2018.Text.Util.UrvPack.Compress(uval);
040| __Y2018.Text.Util.UrvPack.list_Set(uval,"my GREEK"c,"exro_GREEK.lst");
041| __Y2018.Text.Util.UrvPack.put_Set(mp,null_CVar & "GRE"c,uval);
042| __uval2:=Y2018.Text.Util.UrvPack.UnicodeData(unicodeDataFile,Character_name=>"^COPTIC"c,ShowLineCount=>1000);
043| __uval2:=Y2018.Text.Util.UrvPack.Compress(uval2);
044| __Y2018.Text.Util.UrvPack.list_Set(uval2,"my COPTIC"c,"exro_COPTIC.lst");
045| __Y2018.Text.Util.UrvPack.put_Set(mp,null_CVar & "GRE"c,uval2);
046| __Ada.Text_IO.Put_Line ("GREEK&COPTIC");
047| __
048| __Y2018.Text.Util.UrvPack.list_Map(mp,topTitle=>"-- Top of Map --",BottomTitle=>"-- Bottom of Map --");
049|
050| __PatternPack.compileM(p,"^.*?(\p{DIG}.*?)(\p{LAT}+).*"c,Y2018.Text.Util.UrvPack.copy_Map(mp));
051| __if patternPack.matches(p,1,nextPos,source_value,m) then
052| _____declare
053| ________r:I_A_ARRAY:=MatchPack.getMatch(m);
054| _____begin
055| ________Ada.Text_IO.Put_Line ("zero " & subIA(source_value,r(0)));
056| ________Ada.Text_IO.Put_Line ("one. " & subIA(source_value,r(1)));
057| ________Ada.Text_IO.Put_Line ("two. " & subIA(source_value,r(2)));
058| _____end;
059| __end if;
060|
061| __Ada.Text_IO.Put_Line ("*** End of ExRo ***");
062| end ExRo;

Result

Running export LD_LIBRARY_PATH=lib;bin/exro in TEST directory.
Four times file UnicodeData.txt is read through and four listfile is published and the result on the standard out.
exro_DIGIT.lst:


001| my DIGIT
002| ________30 .. ______39
003| ______2488 .. ____2490
004| _____1F100 .. ___1F10A

exro_LATIN.lst


001| my LATIN
002| ________41 .. ______5A
003| ________61 .. ______7A
004| ________C0 .. ______D6
005| ________D8 .. ______F6
006| ________F8 .. _____2AF
007| ______1D00 .. ____1D25
008| ______1D62 .. ____1D65
009| ______1D6B .. ____1D77
010| ______1D79 .. ____1D9A
011| ______1E00 .. ____1EFF
012| ______2090 .. ____209C
013| ______2184 .. ____2184
014| ______271D .. ____271D
015| ______2C60 .. ____2C7C
016| ______2C7E .. ____2C7F
017| ______A722 .. ____A76F
018| ______A771 .. ____A787
019| ______A78B .. ____A7BF
020| ______A7C2 .. ____A7CA
021| ______A7F5 .. ____A7F7
022| ______A7FA .. ____A7FF
023| ______AB30 .. ____AB5A
024| ______AB60 .. ____AB64
025| ______AB66 .. ____AB68
026| ______FB00 .. ____FB06

exro_GREEK.lst


001| my GREEK
002| _______370 .. _____377
003| _______37A .. _____37F
004| _______384 .. _____38A
005| _______38C .. _____38C
006| _______38E .. _____3A1
007| _______3A3 .. _____3E1
008| _______3F0 .. _____3FF
009| ______1D26 .. ____1D2A
010| ______1D66 .. ____1D6A
011| ______1F00 .. ____1F15
012| ______1F18 .. ____1F1D
013| ______1F20 .. ____1F45
014| ______1F48 .. ____1F4D
015| ______1F50 .. ____1F57
016| ______1F59 .. ____1F59
017| ______1F5B .. ____1F5B
018| ______1F5D .. ____1F5D
019| ______1F5F .. ____1F7D
020| ______1F80 .. ____1FB4
021| ______1FB6 .. ____1FC4
022| ______1FC6 .. ____1FD3
023| ______1FD6 .. ____1FDB
024| ______1FDD .. ____1FEF
025| ______1FF2 .. ____1FF4
026| ______1FF6 .. ____1FFE
027| ______AB65 .. ____AB65
028| _____10140 .. ___1018D
029| _____101A0 .. ___101A0
030| _____1D200 .. ___1D241
031| _____1D245 .. ___1D245

exro_COPTIC.lst


001| my COPTIC
002| _______3E2 .. _____3EF
003| ______2C80 .. ____2CF3
004| ______2CF9 .. ____2CFF
005| _____102E0 .. ___102FB

Listing on STDOUT:

001| >
002| .................................<
003| DIGIT
004| >
005| .................................<
006| LATIN
007| >
008| .................................<
009| >
010| .................................<
011| GREEK&COPTIC
012| -- Top of Map --
013| __DIG size= 3
014| __GRE size= 4
015| __LAT size= 25
016| __Number of urvSets 3
017| -- Bottom of Map --
018| zero ulf9_bas_
019| one. 9_
020| two. bas
021| *** End of ExRo ***

The specification of "ShowLineCount=>1000" reults in showing a 'line counter' for those of us who are impatient (print a point ('.') after 1000 lines i read).

The applied pattern is ^.*?(\p{DIG}.*?)(\p{LAT}+).* to value "ulf9_bas_"c and the shown on lines 018 .. 020.
And an output file q____.lst.