1
1
#! /bin/bash
2
2
3
3
# Set directories and model
4
- DATA_DIR=" ${REPO_DIR} /datasets/"
5
- OUTPUT_DIR=" ${REPO_DIR} /outputs/"
4
+ DATA_DIR=" ${REPO_DIR} /datasets/beir/"
5
+ OUTPUT_DIR=" ${REPO_DIR} /outputs/beir/"
6
+ MODEL_IN_USE=" rryisthebest/First_Model"
6
7
7
- # Model configuration
8
- RERANK_TYPE=${1:- " text" } # Default to text
9
- CODE_PROMPT_TYPE=${2:- " docstring" } # Options: "docstring" or "github_issue" (only used when RERANK_TYPE=code)
10
-
11
- if [ " $RERANK_TYPE " = " code" ]; then
12
- MODEL_IN_USE=" cornstack/CodeRankLLM"
13
- # Code reranking doesn't support logits and alpha
14
- USE_LOGITS=0
15
- USE_ALPHA=0
16
- else
17
- MODEL_IN_USE=" rryisthebest/First_Model"
18
- # Text reranking configuration
19
- USE_LOGITS=1 # Whether to use FIRST single token logit decoding
20
- USE_ALPHA=1 # Whether to use Alphabetic Identifiers
21
- fi
8
+ # Configuration flags
9
+ USE_LOGITS=1 # Whether to use FIRST single token logit decoding
10
+ USE_ALPHA=1 # Whether to use Alphabetic Identifiers
22
11
23
12
# List of datasets to rerank
24
- if [ " $RERANK_TYPE " = " code" ]; then
25
- # Datasets suitable for code reranking
26
- DATASETS=(' csn_ruby' ) # 'javascript' 'go' 'php' 'ruby' 'java' 'python' 'cosqa'
27
- else
28
- # Datasets for text reranking
29
- DATASETS=(' trec-covid' ) # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'trec-covid'
30
- fi
13
+ DATASETS=(' dbpedia-entity' ) # 'climate-fever' 'fever' 'hotpotqa' 'msmarco' 'nfcorpus' 'nq' 'fiqa' 'scidocs' 'scifact' 'trec-covid'
31
14
15
+ # Iterate over datasets and rerank each one
32
16
for DATASET in " ${DATASETS[@]} " ; do
33
- echo " Reranking dataset: ${DATASET} using ${RERANK_TYPE} reranking "
17
+ echo " Reranking dataset: ${DATASET} "
34
18
35
19
if python " ${REPO_DIR} /scripts/rerank_llm.py" \
36
20
--model " ${MODEL_IN_USE} " \
@@ -43,12 +27,10 @@ for DATASET in "${DATASETS[@]}"; do
43
27
--llm_top_k 100 \
44
28
--window_size 20 \
45
29
--step_size 10 \
46
- --do_batched 1 \
47
- --rerank_type " ${RERANK_TYPE} " \
48
- --code_prompt_type " ${CODE_PROMPT_TYPE} " ; then
49
- echo " Successfully reranked ${DATASET} with ${RERANK_TYPE} reranker"
30
+ --do_batched 1; then
31
+ echo " Successfully reranked ${DATASET} with LLM reranker"
50
32
else
51
- echo " Failed to rerank ${DATASET} with ${RERANK_TYPE} reranker" >&2
33
+ echo " Failed to rerank ${DATASET} with LLM reranker" >&2
52
34
exit 1
53
35
fi
54
36
done
0 commit comments