diff --git a/logs_analyzer/chapter1/python/databricks/apps/logs/apache_access_log.py b/logs_analyzer/chapter1/python/databricks/apps/logs/apache_access_log.py
index c2b849a..d83e68c 100644
--- a/logs_analyzer/chapter1/python/databricks/apps/logs/apache_access_log.py
+++ b/logs_analyzer/chapter1/python/databricks/apps/logs/apache_access_log.py
@@ -2,14 +2,20 @@
 
 from pyspark.sql import Row
 
+# The last item, content size, could be 0, represented either as '0' or '-'
+# For the log from  http://www.monitorware.com/en/logsamples/apache.php
+# Still there is one line that does not conform. 
+# To save time I just delete that line of log for this tutorial.
 
-APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) (\S+)" (\d{3}) (\d+)'
+APACHE_ACCESS_LOG_PATTERN = '^(\S+) (\S+) (\S+) \[([\w:/]+\s[+\-]\d{4})\] "(\S+) (\S+) (\S+)" (\d{3}) (\d+|-)'
 
 # Returns a dictionary containing the parts of the Apache Access Log.
 def parse_apache_log_line(logline):
     match = re.search(APACHE_ACCESS_LOG_PATTERN, logline)
-    if match is None:
+   if match is None:
         raise Error("Invalid logline: %s" % logline)
+    else:
+    	val = long(match.group(9)) if match.group(9)!='-' else 0
     return Row(
         ip_address    = match.group(1),
         client_identd = match.group(2),
@@ -19,5 +25,5 @@ def parse_apache_log_line(logline):
         endpoint      = match.group(6),
         protocol      = match.group(7),
         response_code = int(match.group(8)),
-        content_size  = long(match.group(9))
+        content_size  = val
     )