Add tests, commandline and docs for include_files and exclude_files

gaogaotiantian · Aug 12, 2020 · 01de79a · 01de79a
1 parent bfd3790
commit 01de79a
Show file tree

Hide file tree

Showing 4 changed files with 116 additions and 9 deletions.
diff --git a/README.md b/README.md
@@ -130,6 +130,73 @@ However, you can generate json file as well, which complies to the chrome trace
 
 At the moment, perfetto did not support locally stand alone HTML file generation, so I'm not able to switch completely to it. The good news is that once you load the perfetto page, you can use it even when you are offline. 
 
+
+### Trace Filter
+
+Sometimes your code is really complicated or you need to run you program for a long time, which means the parsing time would be too long and the HTML/JSON file would be too large. There are ways in viztrace to filter out the data you don't need. 
+
+The filter mechanism only works in C tracer, and it works at tracing time, not parsing time. That means, using filters will introduce some extra overhead while your tracing, but will save significant memory, parsing time and disk space. 
+
+Currently we support two kinds of filters:
+
+#### max_stack_depth
+
+```max_stack_depth``` is a straight forward way to filter your data. It limits the stack depth viztracer will trace, which cuts out deep call stacks, including some nasty recursive calls. 
+
+You can specify ```max_stack_depth``` in command line:
+
+```
+python3 -m viztracer --max_stack_depth 10 my_script.py
+```
+
+Or you can pass it as an argument to the ```VizTracer``` object:
+
+```python
+from viztracer import VizTracer
+
+tracer = VizTracer(max_stack_depth=10)
+```
+
+
+#### include_files and exclude_files
+
+There are cases when you are only interested in functions in certain files. You can use ```include_files``` and ```exclude_files``` feature to filter out data you are not insterested in. 
+
+When you are using ```include_files```, only the files and directories you specify are recorded. Similarly, when you are using ```exclude_files```, files and directories you specify will not be recorded. 
+
+**IMPORTANT: ```include_files``` and ```exclude_files``` can't be both spcified. You can only use one of them.**
+
+**If a function is not recorded based on ```include_files``` or ```exclude_files``` rules, none of its descendent functions will be recorded, even if they match the rules**
+
+You can specify ```include_files``` and ```exclude_files``` in command line, but they can take more than one argument, which will make the following command ambiguous:
+
+```
+# Ambiguous command which should NOT be used
+python3 -m viztracer --include_files ./src my_script.py
+```
+
+Instead, when you are using ```--include_files``` or ```--exclude_files```, ```--run``` should be passed for the command that you actually want to execute:
+
+```
+# --run is used to solve ambiguity
+python3 -m viztracer --include_files ./src --run my_script.py
+```
+
+However, if you have some other commands that can separate them and solve ambiguity, that works as well:
+
+```
+# This will work too
+python3 -m viztracer --include_files ./src --max_stack_depth 5 my_script.py
+```
+
+You can also pass a ```list``` as an argument to ```VizTracer```:
+
+```python
+from viztracer import VizTracer
+
+tracer = VizTracer(include_files=["./src", "./test/test1.py"])
+```
+
 ### Choose Tracer
 
 The default tracer for current version is c tracer, which introduce a relatively small overhead(worst case 2-3x) but only works for CPython on Linux. However, if there's other reason that you would prefer a pure-python tracer, you can use python tracer using ```tracer``` argument when you initialize ```VizTracer``` object.
@@ -138,6 +205,8 @@ The default tracer for current version is c tracer, which introduce a relatively
 tracer = VizTracer(tracer="python")
 ```
 
+**python tracer will be deprecated because of the performance issue in the future**
+
 #### Cleanup of c Tracer
 
 The interface for c trace is almost exactly the same as python tracer, except for the fact that c tracer does not support command line run now. However, to achieve lower overhead, some optimization is applied to c tracer so it will withhold the memory it allocates for future use to reduce the time it calls ```malloc()```. If you want the c trace to free all the memory it allocates while collecting trace, use

diff --git a/src/viztracer/__main__.py b/src/viztracer/__main__.py
@@ -11,23 +11,37 @@
     parser.add_argument("--output_file", "-o", nargs="?", default="result.html")
     parser.add_argument("--quiet", action="store_true", default=False)
     parser.add_argument("--max_stack_depth", nargs="?", type=int, default=-1)
+    parser.add_argument("--exclude_files", nargs="*", default=None)
+    parser.add_argument("--include_files", nargs="*", default=None)
+    parser.add_argument("--run", nargs="*", default=[])
     parser.add_argument("command", nargs=argparse.REMAINDER)
     options = parser.parse_args(sys.argv[1:])
+
+    if options.command:
+        command = options.command
+    elif options.run:
+        command = options.run
+    else:
+        parser.print_help()
+        exit(0)
+
     try:
-        f = options.command[0]
+        f = command[0]
         code_string = open(f).read()
     except FileNotFoundError:
         print("No such file as {}".format(f))
         exit(1)
-    sys.argv = options.command[1:]
+    sys.argv = command[1:]
     if options.quiet:
         verbose = 0
     else:
         verbose = 1
     tracer = VizTracer(
         tracer=options.tracer, 
         verbose=verbose,
-        max_stack_depth=options.max_stack_depth
+        max_stack_depth=options.max_stack_depth,
+        exclude_files=options.exclude_files,
+        include_files=options.include_files
     )
     tracer.start()
     exec(code_string)

diff --git a/src/viztracer/tracer.py b/src/viztracer/tracer.py
@@ -35,7 +35,10 @@ def include_files(self, include_files):
         if include_files == None:
             self.__include_files = None
         elif type(include_files) == list:
-            self.__include_files = [os.path.abspath(f) for f in include_files]
+            if include_files:
+                self.__include_files = [os.path.abspath(f) for f in include_files]
+            else:
+                self.__include_files = None
         else:
             raise Exception("include_files has to be a list")
 
@@ -48,7 +51,10 @@ def exclude_files(self, exclude_files):
         if exclude_files == None:
             self.__exclude_files = None
         elif type(exclude_files) == list:
-            self.__exclude_files = [os.path.abspath(f) for f in exclude_files]
+            if exclude_files:
+                self.__exclude_files = [os.path.abspath(f) for f in exclude_files]
+            else:
+                self.__exclude_files = None
         else:
             raise Exception("exclude_files has to be a list")
 
@@ -63,8 +69,8 @@ def start(self):
                 raise Exception("include_files and exclude_files can't be both specified!")
             snaptrace.config(
                 max_stack_depth=self.max_stack_depth,
-                include_files = self.include_files,
-                exclude_files = self.exclude_files
+                include_files=self.include_files,
+                exclude_files=self.exclude_files
             )
             snaptrace.start()
 

diff --git a/tests/test_cmdline.py b/tests/test_cmdline.py
@@ -17,16 +17,22 @@ def build_script(self):
 
     def cleanup(self, output_file="result.html"):
         os.remove("cmdline_test.py")
-        os.remove(output_file)
+        if output_file:
+            os.remove(output_file)
 
     def template(self, cmd_list, expected_output_file="result.html", success=True):
         self.build_script()
         result = subprocess.run(cmd_list, stdout=subprocess.PIPE)
         self.assertTrue(success ^ (result.returncode != 0))
-        self.assertTrue(os.path.exists(expected_output_file))
+        if expected_output_file:
+            self.assertTrue(os.path.exists(expected_output_file))
         self.cleanup(output_file=expected_output_file)
         return result
 
+    def test_no_file(self):
+        result = self.template(["python", "-m", "viztracer"], expected_output_file=None)
+        self.assertIn("help", result.stdout.decode("utf8"))
+
     def test_run(self):
         self.template(["python", "-m", "viztracer", "cmdline_test.py"])
 
@@ -48,4 +54,16 @@ def test_verbose(self):
 
     def test_max_stack_depth(self):
         self.template(["python", "-m", "viztracer", "--max_stack_depth", "5", "cmdline_test.py"])
+
+    def test_include_files(self):
+        result = self.template(["python", "-m", "viztracer", "--include_files", "./abcd", "cmdline_test.py"], expected_output_file=None)
+        self.assertIn("help", result.stdout.decode("utf8"))
+        self.template(["python", "-m", "viztracer", "--include_files", "./", "--run", "cmdline_test.py"])
+        self.template(["python", "-m", "viztracer", "--include_files", "./", "--max_stack_depth", "5", "cmdline_test.py"])
+        self.template(["python", "-m", "viztracer", "--include_files", "./abcd", "--run", "cmdline_test.py"])
 
+    def test_exclude_files(self):
+        result = self.template(["python", "-m", "viztracer", "--exclude_files", "./abcd", "cmdline_test.py"], expected_output_file=None)
+        self.assertIn("help", result.stdout.decode("utf8"))
+        self.template(["python", "-m", "viztracer", "--exclude_files", "./", "--run", "cmdline_test.py"])
+        self.template(["python", "-m", "viztracer", "--exclude_files", "./abcd", "--run", "cmdline_test.py"])