cjfcsjt
/

data

Model card Files Files and versions

xet

Community

cjfcsjt commited on May 24, 2024

Commit

0a71441

verified ·

1 Parent(s): bf830a4

Upload seeclick_task_prompts.py with huggingface_hub

Browse files

Files changed (1) hide show

seeclick_task_prompts.py +267 -0

seeclick_task_prompts.py ADDED Viewed

	@@ -0,0 +1,267 @@

+# locate all elements in a webpage (bbox)
+web_loca_all_bbox_prompt = [
+    "In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with bbox).",
+    "Based on the screenshot of the page, I give a text description and you give its corresponding location (with bbox).",
+    "In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with bbox).",
+    "I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with bbox).",
+    "Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with bbox).",
+    "Given a screenshot, I will describe specific elements; your task is to predict their locations (with bbox).",
+    "Using the image of this webpage, can you determine the coordinates of the elements I describe (with bbox)?",
+    "In this webpage capture, I will describe certain elements. Please locate them for me (with bbox).",
+    "I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with bbox)?",
+    "From the given webpage screenshot, I need you to identify the locations of described elements (with bbox).",
+    "Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with bbox).",
+    "For the elements I describe in this page capture, can you predict their positions (with bbox)?",
+    "I will describe elements from a webpage screenshot; your role is to locate them (with bbox).",
+    "Using the attached screenshot of a webpage, please find the coordinates of described elements (with bbox).",
+    "From the image of this webpage, I will describe elements for you to locate (with bbox).",
+    "I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with bbox).",
+    "On this webpage screenshot, I will point out elements; please predict their exact coordinates (with bbox).",
+    "In this web page image, please locate the elements as I describe them (with bbox).",
+    "Given this screenshot of a webpage, I'll describe some elements; locate them for me (with bbox).",
+    "Please use the provided webpage screenshot to locate the elements I describe (with bbox).",
+    "In the provided web page image, I'll describe specific elements. Identify their locations, please (with bbox).",
+    "With this screenshot of a webpage, can you locate the elements I describe (with bbox)?",
+    "I will describe features on this webpage screenshot; please predict their positions (with bbox).",
+    "Using the screenshot of this webpage, identify the coordinates of elements I describe (with bbox).",
+    "On this webpage capture, I'll point out specific elements for you to locate (with bbox).",
+    "Please determine the location of elements I describe in this webpage screenshot (with bbox).",
+    "I'll describe certain elements on this webpage image; your task is to find their locations (with bbox).",
+    "Using this webpage screenshot, I'll describe some elements. Please locate them (with bbox).",
+    "Based on my descriptions, find the locations of elements in this webpage screenshot (with bbox).",
+    "In this web page capture, please predict the positions of elements I describe (with bbox).",
+    "I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with bbox).",
+    "Using the provided screenshot, I'll describe webpage elements for you to locate (with bbox).",
+    "From this webpage image, I will describe specific elements. Please predict their exact locations (with bbox)."
+]
+# locate all elements in a webpage (point)
+web_loca_all_point_prompt = [
+    "In the screenshot of this web page, please give me the coordinates of the element I want to click on according to my instructions (with point).",
+    "Based on the screenshot of the page, I give a text description and you give its corresponding location (with point).",
+    "In the image above, I will give a series of descriptions of the elements to be clicked. Please predict where you want to click (with point).",
+    "I will give textual descriptions of certain elements in the screenshot. Please predict the location of the corresponding element (with point).",
+    "Please identify the coordinates of the webpage elements I describe based on the provided screenshot (with point).",
+    "Given a screenshot, I will describe specific elements; your task is to predict their locations (with point).",
+    "Using the image of this webpage, can you determine the coordinates of the elements I describe (with point)?",
+    "In this webpage capture, I will describe certain elements. Please locate them for me (with point).",
+    "I'll provide textual descriptions of elements in this webpage screenshot. Can you find their coordinates (with point)?",
+    "From the given webpage screenshot, I need you to identify the locations of described elements (with point).",
+    "Based on this screenshot, I'll describe some elements. Please pinpoint their exact locations (with point).",
+    "For the elements I describe in this page capture, can you predict their positions (with point)?",
+    "I will describe elements from a webpage screenshot; your role is to locate them (with point).",
+    "Using the attached screenshot of a webpage, please find the coordinates of described elements (with point).",
+    "From the image of this webpage, I will describe elements for you to locate (with point).",
+    "I'll give descriptions of certain webpage elements; please identify where they are in this screenshot (with point).",
+    "On this webpage screenshot, I will point out elements; please predict their exact coordinates (with point).",
+    "In this web page image, please locate the elements as I describe them (with point).",
+    "Given this screenshot of a webpage, I'll describe some elements; locate them for me (with point).",
+    "Please use the provided webpage screenshot to locate the elements I describe (with point).",
+    "In the provided web page image, I'll describe specific elements. Identify their locations, please (with point).",
+    "With this screenshot of a webpage, can you locate the elements I describe (with point)?",
+    "I will describe features on this webpage screenshot; please predict their positions (with point).",
+    "Using the screenshot of this webpage, identify the coordinates of elements I describe (with point).",
+    "On this webpage capture, I'll point out specific elements for you to locate (with point).",
+    "Please determine the location of elements I describe in this webpage screenshot (with point).",
+    "I'll describe certain elements on this webpage image; your task is to find their locations (with point).",
+    "Using this webpage screenshot, I'll describe some elements. Please locate them (with point).",
+    "Based on my descriptions, find the locations of elements in this webpage screenshot (with point).",
+    "In this web page capture, please predict the positions of elements I describe (with point).",
+    "I'll give textual clues about elements in this webpage screenshot; identify their coordinates (with point).",
+    "Using the provided screenshot, I'll describe webpage elements for you to locate (with point).",
+    "From this webpage image, I will describe specific elements. Please predict their exact locations (with point)."
+]
+# ocr all elements in a webpage (bbox)
+web_ocr_all_bbox_prompt = [
+    "Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with bbox).",
+    "In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with bbox).",
+    "On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with bbox).",
+    "Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with bbox).",
+    "In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with bbox).",
+    "Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with bbox).",
+    "With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with bbox).",
+    "In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with bbox).",
+    "From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with bbox)?",
+    "Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with bbox).",
+    "Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with bbox).",
+    "On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with bbox).",
+    "With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with bbox)?",
+    "In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with bbox).",
+    "From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with bbox).",
+    "Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with bbox)?",
+    "On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with bbox).",
+    "Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with bbox).",
+    "In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with bbox).",
+    "From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with bbox)?",
+    "On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with bbox).",
+    "Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with bbox).",
+    "With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with bbox).",
+    "In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with bbox)?",
+    "Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with bbox).",
+    "Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with bbox).",
+    "On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with bbox).",
+    "From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with bbox)?",
+    "In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with bbox).",
+    "Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with bbox).",
+    "Using the provided page image, I'll point to locations; you predict the text content of the elements (with bbox).",
+    "On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with bbox)?",
+    "With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with bbox)."
+]
+# ocr all elements in a webpage (point)
+web_ocr_all_point_prompt = [
+    "Based on the screenshot of the web page, I give you the location to click on and you predict the text content of the corresponding element (with point).",
+    "In the image above, I give a series of coordinates and ask you to describe the corresponding elements (with point).",
+    "On this page, I will give you a series of coordinates and ask you to predict the text of the clickable element that corresponds to these coordinates (with point).",
+    "Given a webpage screenshot, I provide coordinates; predict the text content of the elements at these locations (with point).",
+    "In this screenshot, I'll give coordinates and ask you to describe the text of the elements there (with point).",
+    "Using the provided image of the webpage, I'll specify locations; you predict the text content of those elements (with point).",
+    "With this webpage capture, I provide a series of coordinates; please identify the text content of each element (with point).",
+    "In this page image, I'll point to specific locations; you need to predict the text of the corresponding elements (with point).",
+    "From this screenshot, I'll give coordinates; can you describe the text of the elements at these points (with point)?",
+    "Based on this web page screenshot, I provide coordinates; please predict the textual content at these spots (with point).",
+    "Using the given image of the webpage, I'll specify certain coordinates; describe the text of the elements there (with point).",
+    "On this captured webpage, I will give a series of coordinates; your task is to predict the text at these locations (with point).",
+    "With this webpage image, I provide coordinates; can you tell me the text of the elements at these points (with point)?",
+    "In the provided webpage screenshot, I'll point out locations; please describe the text of the elements there (with point).",
+    "From this web page capture, I give specific coordinates; predict the text content of the elements at these locations (with point).",
+    "Using this screenshot of a webpage, I'll indicate coordinates; can you predict the text of the elements (with point)?",
+    "On this image of a web page, I provide coordinates; you need to describe the text of the corresponding elements (with point).",
+    "Given this webpage capture, I'll specify locations; please predict the text content of the elements there (with point).",
+    "In this screenshot, I give a series of coordinates; your task is to predict the text content of the elements (with point).",
+    "From the given webpage image, I'll provide coordinates; can you describe the text of the elements at these points (with point)?",
+    "On this captured webpage, I provide specific coordinates; you need to predict the text of the elements there (with point).",
+    "Using this web page screenshot, I'll indicate locations; please describe the text content of the elements (with point).",
+    "With this image of a webpage, I specify coordinates; your task is to predict the text of the corresponding elements (with point).",
+    "In this webpage capture, I'll give coordinates; can you predict the text content of the elements at these locations (with point)?",
+    "Based on this screenshot, I provide a series of coordinates; describe the text of the elements there (with point).",
+    "Using the image of this webpage, I'll specify locations; you need to predict the text of the elements (with point).",
+    "On this page screenshot, I give coordinates; please predict the text content of the corresponding elements (with point).",
+    "From this webpage image, I'll indicate specific coordinates; can you describe the text of the elements (with point)?",
+    "In this web page image, I provide coordinates; your task is to predict the text of the elements at these locations (with point).",
+    "Given this screenshot of a webpage, I specify locations; please describe the text of the elements there (with point).",
+    "Using the provided page image, I'll point to locations; you predict the text content of the elements (with point).",
+    "On this webpage capture, I provide a series of coordinates; can you predict the text of the elements (with point)?",
+    "With this image of the web page, I give specific coordinates; your task is to describe the text of the elements at these points (with point)."
+]
+# locate screen element（bbox)
+loca_bbox_prompt = [
+    "In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with bbox)?",
+    "In the UI, where should I click if I want to complete instruction \"{}\" (with bbox)?",
+    "In this screen, how can I navigate to the section that says \"{}\" (with bbox)?",
+    "On this page, what is the location of the button do I press to follow the command \"{}\" (with bbox)?",
+    "For the action described as \"{}\", where is the corresponding icon in this UI (with bbox)?",
+    "To execute the function \"{}\", which item in the UI should I select (in coordinates) (with bbox)?",
+    "In this UI layout, where is the tool that performs the operation \"{}\" (with bbox)?",
+    "On this screen, where can I find the feature that allows me to \"{}\" (with bbox)?",
+    "In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with bbox)?",
+    "Within this dashboard, which widget should I interact with to \"{}\" (with bbox)?",
+    "In the UI here, I need to {}, what is the coordinates of the element is related to this (with bbox)?",
+    "If my goal is to \"{}\", which control in this interface should I use (with bbox)?",
+    "On this device screen, to achieve the outcome \"{}\", where do I tap (with bbox)?",
+    "Facing this interface, where do I access to \"{}\" (with bbox)?",
+    "In this digital interface, to initiate \"{}\", where is my point of interest (with bbox)?",
+    "When using this app, for the function \"{}\", where is the command located (with bbox)?",
+    "In this UI design, to process the instruction \"{}\", where should I activate (with bbox)?",
+    "Within this graphical user interface, to \"{}\", which icon should I be looking for (with bbox)?",
+    "On this web page, to perform \"{}\", where is the link or button I will click (with bbox)?",
+    "In this interface snapshot, to begin \"{}\", what is the clicking point (with bbox)?",
+    "When interacting with this UI, for the operation labeled \"{}\", what is my target (with bbox)?",
+    "On this software's interface, to execute the step \"{}\", where do I direct my attention (with bbox)?",
+    "In the current UI, I want to {}, where should I click (with bbox)?",
+    "In this image, I want to {}, where should I click on (with bbox)?",
+    "In the current UI, to {}, where should I click (with bbox)?",
+    "In this image, to {}, where should I click on (with bbox)?",
+    "On this screen, I need to {}, where do I click (with bbox)?",
+    "In the UI right now, to {}, where should I click (with bbox)?",
+    "In this layout, I want to {}, where is the upload button (with bbox)?",
+    "On this interface, to {}, where should I click (with bbox)?",
+    "In this view, I need to {}, which icon do I select (in coordinates) (with bbox)?",
+    "On this page, I want to {}, where is the option (with bbox)?",
+    "In this webpage, I'm trying to {}, where do I click (with bbox)?",
+    "In this software, to {}, where should I navigate (with bbox)?"
+]
+# locate screen element（point)
+loca_point_prompt = [
+    "In this UI screenshot, what is the position of the element corresponding to the command \"{}\" (with point)?",
+    "In the UI, where should I click if I want to complete instruction \"{}\" (with point)?",
+    "In this screen, how can I navigate to the section that says \"{}\" (with point)?",
+    "On this page, what is the location of the button do I press to follow the command \"{}\" (with point)?",
+    "For the action described as \"{}\", where is the corresponding icon in this UI (with point)?",
+    "To execute the function \"{}\", which item in the UI should I select (in coordinates) (with point)?",
+    "In this UI layout, where is the tool that performs the operation \"{}\" (with point)?",
+    "On this screen, where can I find the feature that allows me to \"{}\" (with point)?",
+    "In the software interface, which menu item corresponds to the task \"{}\" (in coordinates) (with point)?",
+    "Within this dashboard, which widget should I interact with to \"{}\" (with point)?",
+    "In the UI here, I need to {}, what is the coordinates of the element is related to this (with point)?",
+    "If my goal is to \"{}\", which control in this interface should I use (with point)?",
+    "On this device screen, to achieve the outcome \"{}\", where do I tap (with point)?",
+    "Facing this interface, where do I access to \"{}\" (with point)?",
+    "In this digital interface, to initiate \"{}\", where is my point of interest (with point)?",
+    "When using this app, for the function \"{}\", where is the command located (with point)?",
+    "In this UI design, to process the instruction \"{}\", where should I activate (with point)?",
+    "Within this graphical user interface, to \"{}\", which icon should I be looking for (with point)?",
+    "On this web page, to perform \"{}\", where is the link or button I will click (with point)?",
+    "In this interface snapshot, to begin \"{}\", what is the clicking point (with point)?",
+    "When interacting with this UI, for the operation labeled \"{}\", what is my target (with point)?",
+    "On this software's interface, to execute the step \"{}\", where do I direct my attention (with point)?",
+    "In the current UI, I want to {}, where should I click (with point)?",
+    "In this image, I want to {}, where should I click on (with point)?",
+    "In the current UI, to {}, where should I click (with point)?",
+    "In this image, to {}, where should I click on (with point)?",
+    "On this screen, I need to {}, where do I click (with point)?",
+    "In the UI right now, to {}, where should I click (with point)?",
+    "In this layout, I want to {}, where is the upload button (with point)?",
+    "On this interface, to {}, where should I click (with point)?",
+    "In this view, I need to {}, which icon do I select (in coordinates) (with point)?",
+    "On this page, I want to {}, where is the option (with point)?",
+    "In this webpage, I'm trying to {}, where do I click (with point)?",
+    "In this software, to {}, where should I navigate (with point)?"
+]
+# screen caption
+screen_caption_prompt = [
+    "Can you provide a detailed description of the interface screenshot shown?",
+    "Illustrate the details visible in the provided screenshot.",
+    "What does the presented screen image depict?",
+    "How would you narrate the contents of this screen capture to someone who can't see it?",
+    "Please detail the elements shown in the interface screenshot.",
+    "Describe the features and information displayed in this screenshot.",
+    "Elaborate on what is visible in the screenshot of the interface.",
+    "Give a comprehensive description of the screenshot's interface.",
+    "What information is conveyed in the screenshot displayed?",
+    "Could you depict the content and layout of the screen image provided?",
+    "Explain the visual aspects of the screenshot taken from this interface.",
+    "How would you verbally depict the interface shown in the screenshot?",
+    "What key elements are shown in this interface screenshot?",
+    "Provide a verbal representation of the screenshot's content.",
+    "Narrate the components and information visible in this interface capture.",
+    "What are the main features displayed in the screenshot of this screen?",
+    "Outline the specific details shown in the interface image.",
+    "How would you describe this screen image to someone who cannot see it?",
+    "Enumerate the elements and information present in the provided interface screenshot.",
+    "Detail the visual composition of the screen capture you see."
+]
+# widget captioning
+widgetcap_prompt = [
+    "Please generate a description for the element at {}.",
+    "Describe the function of the element at {} on the screen.",
+    "What is the function of the element at {} on the UI?",
+    "What happens when you tap position {} on the screen?",
+    "What happens when you click point {} on the screen?",
+    "Can you explain what the user interface element at {} does?",
+    "What action is triggered by interacting with the area at {}?",
+    "Explain the purpose of the interactive element found at {}.",
+    "What feature is accessed by selecting the location at {}?",
+    "Identify and describe the component located at {}.",
+    "What is the outcome of selecting the element at {}?",
+    "Detail the functionality of the UI element positioned at {}.",
+    "What is the significance of the element located at {} in the application?",
+    "How does the element at {} contribute to the overall user experience?",
+    "What kind of input or interaction is expected at the point marked {}?"
+]