Skip to content

Commit de50ff2

Browse files
committed
Revert Bash to text default
1 parent c60e82f commit de50ff2

File tree

3 files changed

+34
-71
lines changed

3 files changed

+34
-71
lines changed

bash/run_convert_results.sh

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,22 @@
1-
#!/bin/bash
1+
data_dir=${REPO_DIR}/datasets/beir/
2+
output_dir=${REPO_DIR}/outputs/beir/
23

3-
data_dir=${REPO_DIR}/datasets/
4-
output_dir=${REPO_DIR}/outputs/
4+
# List of datasets to process
5+
datasets=('trec-covid') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'dbpedia-entity'
56

6-
RERANK_TYPE=${1:-"text"} # Default to text if no argument provided
7-
8-
if [ "$RERANK_TYPE" = "code" ]; then
9-
# Code datasets to process
10-
datasets=('csn_go') # 'csn_go' 'csn_java' 'csn_python' 'csn_javascript' 'csn_php' 'csn_ruby' 'cosqa'
11-
data_type="codedataset"
12-
else
13-
# BEIR datasets to process
14-
datasets=('trec-covid') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'dbpedia-entity'
15-
data_type="beir"
16-
fi
17-
18-
for dataset in "${datasets[@]}"; do
19-
echo "Processing dataset: ${dataset} (${RERANK_TYPE} reranking)"
7+
# Iterate over datasets and process each one
8+
for datasets in "${datasets[@]}"; do
9+
echo "Processing dataset: ${datasets}"
2010

2111
if python "${REPO_DIR}/scripts/convert_results.py" \
22-
--dataset "${dataset}" \
12+
--dataset "${datasets}" \
2313
--output_dir "${output_dir}" \
24-
--data_type "${data_type}" \
14+
--data_type "beir" \
2515
--data_dir "${data_dir}" \
26-
--top_k 100 \
27-
--rerank_type "${RERANK_TYPE}"; then
28-
echo "Successfully processed ${dataset}"
16+
--top_k 100; then
17+
echo "Successfully processed ${datasets}"
2918
else
30-
echo "Failed to process ${dataset}" >&2
19+
echo "Failed to process ${datasets}" >&2
3120
exit 1
3221
fi
3322
done

bash/run_eval.sh

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,30 @@
11
#!/bin/bash
22

3+
# Check if eval_type argument is provided
34
if [ -z "$1" ]; then
4-
echo "Usage: $0 <eval_type> [rerank_type]"
5+
echo "Usage: $0 <eval_type>"
56
exit 1
67
fi
78

89
EVAL_TYPE=$1
9-
RERANK_TYPE=${2:-"text"} # Default to text if not specified
10+
DATA_DIR="${REPO_DIR}/datasets/beir/"
11+
OUTPUT_DIR="${REPO_DIR}/outputs/beir/"
1012

11-
DATA_DIR="${REPO_DIR}/datasets/"
12-
OUTPUT_DIR="${REPO_DIR}/outputs/"
13-
14-
if [ "$RERANK_TYPE" = "code" ]; then
15-
# Code datasets to process
16-
DATASETS=('csn_go') # 'csn_go' 'csn_java' 'csn_python' 'csn_javascript' 'csn_php' 'csn_ruby' 'cosqa'
17-
DATA_TYPE="codedataset"
18-
else
19-
# BEIR datasets to process
20-
DATASETS=('trec-covid') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'dbpedia-entity'
21-
DATA_TYPE="beir"
22-
fi
13+
# List of datasets to process
14+
DATASETS=('trec-covid') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'dbpedia-entity'
2315

16+
# Iterate over datasets and process each one
2417
for DATASET in "${DATASETS[@]}"; do
25-
echo "Evaluating dataset: ${DATASET} (${RERANK_TYPE} reranking)"
18+
echo "Evaluating dataset: ${DATASET}"
2619

2720
# suffix: ce -> cross encoder reranker | llm_FIRST_alpha -> FIRST Model
2821
if python "${REPO_DIR}/scripts/eval.py" \
2922
--dataset "${DATASET}" \
3023
--output_path "${OUTPUT_DIR}" \
31-
--data_type "${DATA_TYPE}" \
24+
--data_type "beir" \
3225
--suffix "llm_FIRST_alpha" \
3326
--eval_type "${EVAL_TYPE}" \
34-
--data_dir "${DATA_DIR}" \
35-
--rerank_type "${RERANK_TYPE}"; then
27+
--data_dir "${DATA_DIR}"; then
3628
echo "Successfully evaluated ${DATASET}"
3729
else
3830
echo "Failed to evaluate ${DATASET}" >&2

bash/run_rerank_llm.sh

Lines changed: 12 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,20 @@
11
#!/bin/bash
22

33
# Set directories and model
4-
DATA_DIR="${REPO_DIR}/datasets/"
5-
OUTPUT_DIR="${REPO_DIR}/outputs/"
4+
DATA_DIR="${REPO_DIR}/datasets/beir/"
5+
OUTPUT_DIR="${REPO_DIR}/outputs/beir/"
6+
MODEL_IN_USE="rryisthebest/First_Model"
67

7-
# Model configuration
8-
RERANK_TYPE=${1:-"text"} # Default to text
9-
CODE_PROMPT_TYPE=${2:-"docstring"} # Options: "docstring" or "github_issue" (only used when RERANK_TYPE=code)
10-
11-
if [ "$RERANK_TYPE" = "code" ]; then
12-
MODEL_IN_USE="cornstack/CodeRankLLM"
13-
# Code reranking doesn't support logits and alpha
14-
USE_LOGITS=0
15-
USE_ALPHA=0
16-
else
17-
MODEL_IN_USE="rryisthebest/First_Model"
18-
# Text reranking configuration
19-
USE_LOGITS=1 # Whether to use FIRST single token logit decoding
20-
USE_ALPHA=1 # Whether to use Alphabetic Identifiers
21-
fi
8+
# Configuration flags
9+
USE_LOGITS=1 # Whether to use FIRST single token logit decoding
10+
USE_ALPHA=1 # Whether to use Alphabetic Identifiers
2211

2312
# List of datasets to rerank
24-
if [ "$RERANK_TYPE" = "code" ]; then
25-
# Datasets suitable for code reranking
26-
DATASETS=('csn_ruby') # 'javascript' 'go' 'php' 'ruby' 'java' 'python' 'cosqa'
27-
else
28-
# Datasets for text reranking
29-
DATASETS=('trec-covid') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'trec-covid'
30-
fi
13+
DATASETS=('dbpedia-entity') # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'trec-covid'
3114

15+
# Iterate over datasets and rerank each one
3216
for DATASET in "${DATASETS[@]}"; do
33-
echo "Reranking dataset: ${DATASET} using ${RERANK_TYPE} reranking"
17+
echo "Reranking dataset: ${DATASET}"
3418

3519
if python "${REPO_DIR}/scripts/rerank_llm.py" \
3620
--model "${MODEL_IN_USE}" \
@@ -43,12 +27,10 @@ for DATASET in "${DATASETS[@]}"; do
4327
--llm_top_k 100 \
4428
--window_size 20 \
4529
--step_size 10 \
46-
--do_batched 1 \
47-
--rerank_type "${RERANK_TYPE}" \
48-
--code_prompt_type "${CODE_PROMPT_TYPE}"; then
49-
echo "Successfully reranked ${DATASET} with ${RERANK_TYPE} reranker"
30+
--do_batched 1; then
31+
echo "Successfully reranked ${DATASET} with LLM reranker"
5032
else
51-
echo "Failed to rerank ${DATASET} with ${RERANK_TYPE} reranker" >&2
33+
echo "Failed to rerank ${DATASET} with LLM reranker" >&2
5234
exit 1
5335
fi
5436
done

0 commit comments

Comments
 (0)