Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
.cache
.idea
*.egg-info/
*.pyc
.cache
dist/
135 changes: 3 additions & 132 deletions bin/udapy
Original file line number Diff line number Diff line change
@@ -1,136 +1,7 @@
#!/usr/bin/env python3
import os
import gc
"""Thin wrapper for backward compatibility. Calls udapi.cli.main()."""
import sys
import atexit
import logging
import argparse
from udapi.cli import main

from udapi.core.run import Run

# Parse command line arguments.
argparser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
usage="udapy [optional_arguments] scenario",
epilog="See http://udapi.github.io",
description="udapy - Python interface to Udapi - API for Universal Dependencies\n\n"
"Examples of usage:\n"
" udapy -s read.Sentences udpipe.En < in.txt > out.conllu\n"
" udapy -T < sample.conllu | less -R\n"
" udapy -HAM ud.MarkBugs < sample.conllu > bugs.html\n")
argparser.add_argument(
"-q", "--quiet", action="store_true",
help="Warning, info and debug messages are suppressed. Only fatal errors are reported.")
argparser.add_argument(
"-v", "--verbose", action="store_true",
help="Warning, info and debug messages are printed to the STDERR.")
argparser.add_argument(
"-s", "--save", action="store_true",
help="Add write.Conllu to the end of the scenario")
argparser.add_argument(
"-T", "--save_text_mode_trees", action="store_true",
help="Add write.TextModeTrees color=1 to the end of the scenario")
argparser.add_argument(
"-H", "--save_html", action="store_true",
help="Add write.TextModeTreesHtml color=1 to the end of the scenario")
argparser.add_argument(
"-A", "--save_all_attributes", action="store_true",
help="Add attributes=form,lemma,upos,xpos,feats,deprel,misc (to be used after -T and -H)")
argparser.add_argument(
"-C", "--save_comments", action="store_true",
help="Add print_comments=1 (to be used after -T and -H)")
argparser.add_argument(
"-M", "--marked_only", action="store_true",
help="Add marked_only=1 to the end of the scenario (to be used after -T and -H)")
argparser.add_argument(
"-N", "--no_color", action="store_true",
help="Add color=0 to the end of the scenario, this overrides color=1 of -T and -H")
argparser.add_argument(
"-X", "--extra", action="append",
help="Add a specified parameter (or a block name) to the end of the scenario\n"
"For example 'udapy -TNX attributes=form,misc -X layout=align < my.conllu'")
argparser.add_argument(
"--gc", action="store_true",
help="By default, udapy disables Python garbage collection and at-exit cleanup\n"
"to speed up everything (especially reading CoNLL-U files). In edge cases,\n"
"when processing many files and running out of memory, you can disable this\n"
"optimization (i.e. enable garbage collection) with 'udapy --gc'.")
argparser.add_argument(
'scenario', nargs=argparse.REMAINDER, help="A sequence of blocks and their parameters.")

args = argparser.parse_args()

# Set the level of logs according to parameters.
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.CRITICAL
else:
level = logging.INFO

logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s',
level=level)

# Global flag to track if an unhandled exception occurred
_unhandled_exception_occurred = False

def _custom_excepthook(exc_type, exc_value, traceback):
global _unhandled_exception_occurred
_unhandled_exception_occurred = True

# Call the default excepthook to allow normal error reporting
sys.__excepthook__(exc_type, exc_value, traceback)

# Override the default excepthook
sys.excepthook = _custom_excepthook


# Process and provide the scenario.
if __name__ == "__main__":

# Disabling garbage collections makes the whole processing much faster.
# Similarly, we can save several seconds by partially disabling the at-exit Python cleanup
# (atexit hooks are called in reversed order of their registration,
# so flushing stdio buffers etc. will be still done before the os._exit(0) call).
# See https://instagram-engineering.com/dismissing-python-garbage-collection-at-instagram-4dca40b29172
# Is it safe to disable GC?
# OS will free the memory allocated by this process after it ends anyway.
# The udapy wrapper is aimed for one-time tasks, not a long-running server,
# so in a typical case a document is loaded and almost no memory is freed before the end.
# Udapi documents have a many cyclic references, so running GC is quite slow.
if not args.gc:
gc.disable()
# When an exception/error has happened, udapy should exit with a non-zero exit code,
# so that users can use `udapy ... || echo "Error detected"` (or Makefile reports errors).
# However, we cannot use `atexit.register(lambda: os._exit(1 if sys.exc_info()[0] else 0))`
# because the Python has already exited the exception-handling block
# (the exception/error has been already reported and sys.exc_info()[0] is None).
# We thus keep record whether _unhandled_exception_occurred.
atexit.register(lambda: os._exit(1 if _unhandled_exception_occurred else 0))
atexit.register(sys.stderr.flush)
if args.save:
args.scenario = args.scenario + ['write.Conllu']
if args.save_text_mode_trees:
args.scenario = args.scenario + ['write.TextModeTrees', 'color=1']
if args.save_html:
args.scenario = args.scenario + ['write.TextModeTreesHtml', 'color=1']
if args.save_all_attributes:
args.scenario = args.scenario + ['attributes=form,lemma,upos,xpos,feats,deprel,misc']
if args.save_comments:
args.scenario = args.scenario + ['print_comments=1']
if args.marked_only:
args.scenario = args.scenario + ['marked_only=1']
if args.no_color:
args.scenario = args.scenario + ['color=0']
if args.extra:
args.scenario += args.extra

runner = Run(args)
# udapy is often piped to head etc., e.g.
# `seq 1000 | udapy -s read.Sentences | head`
# Let's prevent Python from reporting (with distracting stacktrace)
# "BrokenPipeError: [Errno 32] Broken pipe"
try:
runner.execute()
except BrokenPipeError:
pass
sys.exit(main())
8 changes: 4 additions & 4 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ classifiers =
packages = find:
python_requires = >=3.9
include_package_data = True
scripts =
bin/udapy
install_requires =
colorama
termcolor

[options.entry_points]
console_scripts =
udapy = udapi:cli.main

[options.extras_require]
test =
pytest


140 changes: 140 additions & 0 deletions udapi/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#!/usr/bin/env python3
import os
import gc
import sys
import atexit
import logging
import argparse

from udapi.core.run import Run

# Parse command line arguments.
argparser = argparse.ArgumentParser(
formatter_class=argparse.RawTextHelpFormatter,
usage="udapy [optional_arguments] scenario",
epilog="See http://udapi.github.io",
description="udapy - Python interface to Udapi - API for Universal Dependencies\n\n"
"Examples of usage:\n"
" udapy -s read.Sentences udpipe.En < in.txt > out.conllu\n"
" udapy -T < sample.conllu | less -R\n"
" udapy -HAM ud.MarkBugs < sample.conllu > bugs.html\n")
argparser.add_argument(
"-q", "--quiet", action="store_true",
help="Warning, info and debug messages are suppressed. Only fatal errors are reported.")
argparser.add_argument(
"-v", "--verbose", action="store_true",
help="Warning, info and debug messages are printed to the STDERR.")
argparser.add_argument(
"-s", "--save", action="store_true",
help="Add write.Conllu to the end of the scenario")
argparser.add_argument(
"-T", "--save_text_mode_trees", action="store_true",
help="Add write.TextModeTrees color=1 to the end of the scenario")
argparser.add_argument(
"-H", "--save_html", action="store_true",
help="Add write.TextModeTreesHtml color=1 to the end of the scenario")
argparser.add_argument(
"-A", "--save_all_attributes", action="store_true",
help="Add attributes=form,lemma,upos,xpos,feats,deprel,misc (to be used after -T and -H)")
argparser.add_argument(
"-C", "--save_comments", action="store_true",
help="Add print_comments=1 (to be used after -T and -H)")
argparser.add_argument(
"-M", "--marked_only", action="store_true",
help="Add marked_only=1 to the end of the scenario (to be used after -T and -H)")
argparser.add_argument(
"-N", "--no_color", action="store_true",
help="Add color=0 to the end of the scenario, this overrides color=1 of -T and -H")
argparser.add_argument(
"-X", "--extra", action="append",
help="Add a specified parameter (or a block name) to the end of the scenario\n"
"For example 'udapy -TNX attributes=form,misc -X layout=align < my.conllu'")
argparser.add_argument(
"--gc", action="store_true",
help="By default, udapy disables Python garbage collection and at-exit cleanup\n"
"to speed up everything (especially reading CoNLL-U files). In edge cases,\n"
"when processing many files and running out of memory, you can disable this\n"
"optimization (i.e. enable garbage collection) with 'udapy --gc'.")
argparser.add_argument(
'scenario', nargs=argparse.REMAINDER, help="A sequence of blocks and their parameters.")


# Process and provide the scenario.
def main(argv=None):
args = argparser.parse_args(argv)

# Set the level of logs according to parameters.
if args.verbose:
level = logging.DEBUG
elif args.quiet:
level = logging.CRITICAL
else:
level = logging.INFO

logging.basicConfig(format='%(asctime)-15s [%(levelname)7s] %(funcName)s - %(message)s',
level=level)

# Global flag to track if an unhandled exception occurred
_unhandled_exception_occurred = False

def _custom_excepthook(exc_type, exc_value, traceback):
global _unhandled_exception_occurred
_unhandled_exception_occurred = True

# Call the default excepthook to allow normal error reporting
sys.__excepthook__(exc_type, exc_value, traceback)

# Override the default excepthook
sys.excepthook = _custom_excepthook

# Disabling garbage collections makes the whole processing much faster.
# Similarly, we can save several seconds by partially disabling the at-exit Python cleanup
# (atexit hooks are called in reversed order of their registration,
# so flushing stdio buffers etc. will be still done before the os._exit(0) call).
# See https://instagram-engineering.com/dismissing-python-garbage-collection-at-instagram-4dca40b29172
# Is it safe to disable GC?
# OS will free the memory allocated by this process after it ends anyway.
# The udapy wrapper is aimed for one-time tasks, not a long-running server,
# so in a typical case a document is loaded and almost no memory is freed before the end.
# Udapi documents have a many cyclic references, so running GC is quite slow.
if not args.gc:
gc.disable()
# When an exception/error has happened, udapy should exit with a non-zero exit code,
# so that users can use `udapy ... || echo "Error detected"` (or Makefile reports errors).
# However, we cannot use `atexit.register(lambda: os._exit(1 if sys.exc_info()[0] else 0))`
# because the Python has already exited the exception-handling block
# (the exception/error has been already reported and sys.exc_info()[0] is None).
# We thus keep record whether _unhandled_exception_occurred.
atexit.register(lambda: os._exit(1 if _unhandled_exception_occurred else 0))
atexit.register(sys.stderr.flush)
if args.save:
args.scenario = args.scenario + ['write.Conllu']
if args.save_text_mode_trees:
args.scenario = args.scenario + ['write.TextModeTrees', 'color=1']
if args.save_html:
args.scenario = args.scenario + ['write.TextModeTreesHtml', 'color=1']
if args.save_all_attributes:
args.scenario = args.scenario + ['attributes=form,lemma,upos,xpos,feats,deprel,misc']
if args.save_comments:
args.scenario = args.scenario + ['print_comments=1']
if args.marked_only:
args.scenario = args.scenario + ['marked_only=1']
if args.no_color:
args.scenario = args.scenario + ['color=0']
if args.extra:
args.scenario += args.extra

runner = Run(args)
# udapy is often piped to head etc., e.g.
# `seq 1000 | udapy -s read.Sentences | head`
# Let's prevent Python from reporting (with distracting stacktrace)
# "BrokenPipeError: [Errno 32] Broken pipe"
try:
runner.execute()
except BrokenPipeError:
pass
return 0


if __name__ == "__main__":
sys.exit(main())