class Myclass:
#*Method 1*
@staticmethod
def remove_newlines(txt):
txt = re.sub(r'[\n]+','\n', txt)
return txt
def clean_text(self,txt):
txt = self.remove_tags(txt)
txt = self.remove_newlines(txt)
return txt
def clean_text_column(self, df_col):
if self.parallel:
pandarallel.initialize(progress_bar=True)
df_col = df_col.parallel_apply(self.clean_text)
else:
df_col = df_col.apply(self.clean_text)
return df_col
#*Method 2*
@staticmethod
def get_tokenizer(model = 'cl100k_base'):
return tiktoken.get_encoding(model)
@staticmethod
def get_tokens(text, tokenizer):
tokens = tokenizer.encode(
text,
disallowed_special=()
)
return len(tokens)
def get_tokens_column(self, df_col):
tokenizer = self.get_tokenizer()
if self.parallel:
pandarallel.initialize(progress_bar=True)
df_col = df_col.parallel_apply(self.get_tokens, args=(tokenizer,))
else:
# there is an issue with pandarallel here.
df_col = df_col.apply(self.get_tokens, args = (tokenizer,))
return df_col
General
Acknowledgement
pandaswithout alone (withoutpandarallel)before writing a new bug report
Bug description
Observed behavior
I have 2 function that I'm running with
parallel_applyon my dataframe. Here are the functions:The first method runs ok with

parallel_apply, but the second method gets stuck at 0% without raising any error.