-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Translate and adjust count example to fix #589
I cant adjust the metrics example yet. At least I do not know what the outcome of the old morph is since it uses square.
- Loading branch information
Showing
12 changed files
with
200 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
6 changes: 6 additions & 0 deletions
6
metafacture-runner/src/main/dist/examples/count/gnd/gnd-type.fix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
if any_match("[email protected]","...*") | ||
replace_all("[email protected]","^(..).*","$1") #only keep the first two letters | ||
retain("[email protected]") # only keep the relevent element | ||
else | ||
reject() | ||
end |
11 changes: 11 additions & 0 deletions
11
metafacture-runner/src/main/dist/examples/count/subjects/10.pica
Large diffs are not rendered by default.
Oops, something went wrong.
8 changes: 8 additions & 0 deletions
8
metafacture-runner/src/main/dist/examples/count/subjects/references.fix
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
do list(path:"041A*","var":"$i") | ||
copy_field("$i.9","relevantField.$append") | ||
end | ||
|
||
trim("relevantField.*") | ||
uniq("relevantField") | ||
|
||
retain("relevantField") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
metafacture-runner/src/main/dist/examples/morph/count/gnd/count-gnd-types.flux
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
default fileName = FLUX_DIR + "gnd-sample.pica"; | ||
|
||
fileName| | ||
open-file| | ||
as-lines| | ||
decode-pica| | ||
morph(FLUX_DIR + "gnd-type.xml")| | ||
stream-to-triples| | ||
count-triples(countBy="object")| | ||
template("${s}\t${o}")| | ||
write("stdout"); |
100 changes: 100 additions & 0 deletions
100
metafacture-runner/src/main/dist/examples/morph/count/gnd/gnd-sample.pica
Large diffs are not rendered by default.
Oops, something went wrong.
File renamed without changes.
14 changes: 14 additions & 0 deletions
14
metafacture-runner/src/main/dist/examples/morph/count/metrics/metrics-gnd-subjects.flux
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
|
||
catalogue| | ||
open-file| | ||
as-lines| | ||
catch-object-exception| | ||
decode-pica| | ||
batch-log(batchsize="100000")| | ||
morph(FLUX_DIR + "subject-cooccurrence.xml")| | ||
stream-to-triples| | ||
count-triples(countBy="object")| | ||
calculate-metrics("X2")| | ||
template("${s} ${o}")| | ||
//write("stdout"); | ||
write(FLUX_DIR+"x2.dat"); |
27 changes: 27 additions & 0 deletions
27
metafacture-runner/src/main/dist/examples/morph/count/metrics/subject-cooccurrence.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<metamorph xmlns="http://www.culturegraph.org/metamorph" | ||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="1"> | ||
<rules> | ||
|
||
<data source="041A*.9" name="@subj"> | ||
<trim /> | ||
<unique /> | ||
</data> | ||
|
||
<square delimiter="&" name=""> | ||
<data source="@subj" name=""/> | ||
<postprocess> | ||
<compose prefix="2:"/> | ||
</postprocess> | ||
</square> | ||
|
||
<data source="@subj" name=""> | ||
<compose prefix="1:"/> | ||
</data> | ||
|
||
<data source="@subj" name=""> | ||
<occurrence only="1" /> | ||
<constant value="1:" /> | ||
</data> | ||
</rules> | ||
</metamorph> |
19 changes: 19 additions & 0 deletions
19
metafacture-runner/src/main/dist/examples/morph/count/subjects/references.flux
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
|
||
default counts=FLUX_DIR + "counts.dat"; | ||
default catalogue = FLUX_DIR + "10.pica"; | ||
|
||
//count references | ||
"counting references in " + catalogue | write("stdout"); | ||
|
||
catalogue| | ||
open-file| | ||
as-lines| | ||
catch-object-exception| | ||
decode-pica| | ||
morph(FLUX_DIR + "references.xml")| | ||
stream-to-triples| | ||
count-triples(countBy="object")| | ||
|
||
write("subjects.dat"); | ||
|
||
|
File renamed without changes.